evoasm 0.0.2.pre7 → 0.1.0.pre2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (168) hide show
  1. checksums.yaml +4 -4
  2. data/.gdbinit +41 -0
  3. data/.gitignore +1 -2
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +8 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.md +660 -0
  8. data/Makefile +1 -1
  9. data/README.md +17 -9
  10. data/Rakefile +39 -107
  11. data/bin/gdb +1 -1
  12. data/bin/gdb_loop +4 -0
  13. data/docs/FindingInstructions.md +17 -0
  14. data/docs/JIT.md +14 -0
  15. data/docs/SymbolicRegression.md +102 -0
  16. data/docs/Visualization.md +29 -0
  17. data/docs/examples/bit_insts.rb +44 -0
  18. data/docs/examples/jit.rb +26 -0
  19. data/docs/examples/loss.gif +0 -0
  20. data/docs/examples/program.png +0 -0
  21. data/docs/examples/sym_reg.rb +64 -0
  22. data/docs/examples/vis.rb +38 -0
  23. data/evoasm.gemspec +21 -15
  24. data/ext/evoasm_ext/Rakefile +3 -0
  25. data/ext/evoasm_ext/compile.rake +35 -0
  26. data/ext/evoasm_ext/libevoasm/src/evoasm-alloc.c +226 -0
  27. data/ext/evoasm_ext/libevoasm/src/evoasm-alloc.h +84 -0
  28. data/ext/evoasm_ext/libevoasm/src/evoasm-arch.c +52 -0
  29. data/ext/evoasm_ext/libevoasm/src/evoasm-arch.h +101 -0
  30. data/ext/evoasm_ext/libevoasm/src/evoasm-bitmap.h +158 -0
  31. data/ext/evoasm_ext/libevoasm/src/evoasm-buf.c +204 -0
  32. data/ext/evoasm_ext/libevoasm/src/evoasm-buf.h +109 -0
  33. data/ext/evoasm_ext/libevoasm/src/evoasm-domain.c +124 -0
  34. data/ext/evoasm_ext/libevoasm/src/evoasm-domain.h +279 -0
  35. data/ext/evoasm_ext/libevoasm/src/evoasm-error.c +65 -0
  36. data/ext/evoasm_ext/libevoasm/src/evoasm-error.h +108 -0
  37. data/ext/evoasm_ext/{evoasm-log.c → libevoasm/src/evoasm-log.c} +36 -18
  38. data/ext/evoasm_ext/libevoasm/src/evoasm-log.h +93 -0
  39. data/ext/evoasm_ext/libevoasm/src/evoasm-param.c +22 -0
  40. data/ext/evoasm_ext/libevoasm/src/evoasm-param.h +33 -0
  41. data/ext/evoasm_ext/libevoasm/src/evoasm-pop-params.c +192 -0
  42. data/ext/evoasm_ext/libevoasm/src/evoasm-pop-params.h +60 -0
  43. data/ext/evoasm_ext/libevoasm/src/evoasm-pop.c +1323 -0
  44. data/ext/evoasm_ext/libevoasm/src/evoasm-pop.h +107 -0
  45. data/ext/evoasm_ext/libevoasm/src/evoasm-program-io.c +116 -0
  46. data/ext/evoasm_ext/libevoasm/src/evoasm-program-io.h +60 -0
  47. data/ext/evoasm_ext/libevoasm/src/evoasm-program.c +1827 -0
  48. data/ext/evoasm_ext/libevoasm/src/evoasm-program.h +167 -0
  49. data/ext/evoasm_ext/libevoasm/src/evoasm-rand.c +65 -0
  50. data/ext/evoasm_ext/libevoasm/src/evoasm-rand.h +76 -0
  51. data/ext/evoasm_ext/libevoasm/src/evoasm-signal.c +106 -0
  52. data/ext/evoasm_ext/libevoasm/src/evoasm-signal.h +58 -0
  53. data/ext/evoasm_ext/libevoasm/src/evoasm-util.h +112 -0
  54. data/ext/evoasm_ext/libevoasm/src/evoasm-x64.c +925 -0
  55. data/ext/evoasm_ext/libevoasm/src/evoasm-x64.h +277 -0
  56. data/ext/evoasm_ext/libevoasm/src/evoasm.c +28 -0
  57. data/ext/evoasm_ext/libevoasm/src/evoasm.h +35 -0
  58. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-enums.h +2077 -0
  59. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-insts.c +191203 -0
  60. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-insts.h +1713 -0
  61. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-misc.c +348 -0
  62. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-misc.h +93 -0
  63. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-params.c +51 -0
  64. data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-params.h +509 -0
  65. data/lib/evoasm.rb +28 -11
  66. data/lib/evoasm/buffer.rb +105 -0
  67. data/lib/evoasm/capstone.rb +100 -0
  68. data/lib/evoasm/domain.rb +116 -0
  69. data/lib/evoasm/error.rb +37 -16
  70. data/lib/evoasm/exception_error.rb +19 -0
  71. data/lib/evoasm/ffi_ext.rb +53 -0
  72. data/lib/evoasm/libevoasm.rb +286 -0
  73. data/lib/evoasm/libevoasm/x64_enums.rb +1967 -0
  74. data/lib/evoasm/parameter.rb +20 -0
  75. data/lib/evoasm/population.rb +145 -0
  76. data/lib/evoasm/population/parameters.rb +227 -0
  77. data/lib/evoasm/population/plotter.rb +89 -0
  78. data/lib/evoasm/prng.rb +64 -0
  79. data/lib/evoasm/program.rb +195 -12
  80. data/lib/evoasm/program/io.rb +144 -0
  81. data/lib/evoasm/test.rb +8 -0
  82. data/lib/evoasm/version.rb +1 -1
  83. data/lib/evoasm/x64.rb +115 -0
  84. data/lib/evoasm/x64/cpu_state.rb +95 -0
  85. data/lib/evoasm/x64/instruction.rb +109 -0
  86. data/lib/evoasm/x64/operand.rb +156 -0
  87. data/lib/evoasm/x64/parameters.rb +211 -0
  88. data/test/helpers/population_helper.rb +128 -0
  89. data/test/helpers/test_helper.rb +1 -0
  90. data/test/helpers/x64_helper.rb +24 -0
  91. data/test/integration/bitwise_reverse_test.rb +41 -0
  92. data/test/integration/gcd_test.rb +52 -0
  93. data/test/integration/popcnt_test.rb +46 -0
  94. data/test/integration/sym_reg_test.rb +68 -0
  95. data/test/unit/evoasm/buffer_test.rb +48 -0
  96. data/test/unit/evoasm/capstone_test.rb +18 -0
  97. data/test/unit/evoasm/domain_test.rb +55 -0
  98. data/test/unit/evoasm/population/parameters_test.rb +106 -0
  99. data/test/unit/evoasm/population_test.rb +96 -0
  100. data/test/unit/evoasm/prng_test.rb +47 -0
  101. data/test/unit/evoasm/x64/cpu_state_test.rb +73 -0
  102. data/test/unit/evoasm/x64/encoding_test.rb +320 -0
  103. data/test/unit/evoasm/x64/instruction_access_test.rb +177 -0
  104. data/test/unit/evoasm/x64/instruction_encoding_test.rb +780 -0
  105. data/test/unit/evoasm/x64/instruction_test.rb +62 -0
  106. data/test/unit/evoasm/x64/parameters_test.rb +65 -0
  107. data/test/unit/evoasm/x64_test.rb +52 -0
  108. metadata +195 -89
  109. data/Gemfile.rake +0 -8
  110. data/Gemfile.rake.lock +0 -51
  111. data/LICENSE.txt +0 -373
  112. data/data/tables/README.md +0 -19
  113. data/data/tables/x64.csv +0 -1684
  114. data/data/templates/evoasm-x64.c.erb +0 -319
  115. data/data/templates/evoasm-x64.h.erb +0 -126
  116. data/examples/abs.yml +0 -20
  117. data/examples/popcnt.yml +0 -17
  118. data/examples/sym_reg.yml +0 -26
  119. data/exe/evoasm-search +0 -13
  120. data/ext/evoasm_ext/evoasm-alloc.c +0 -145
  121. data/ext/evoasm_ext/evoasm-alloc.h +0 -59
  122. data/ext/evoasm_ext/evoasm-arch.c +0 -44
  123. data/ext/evoasm_ext/evoasm-arch.h +0 -161
  124. data/ext/evoasm_ext/evoasm-bitmap.h +0 -114
  125. data/ext/evoasm_ext/evoasm-buf.c +0 -130
  126. data/ext/evoasm_ext/evoasm-buf.h +0 -47
  127. data/ext/evoasm_ext/evoasm-error.c +0 -31
  128. data/ext/evoasm_ext/evoasm-error.h +0 -75
  129. data/ext/evoasm_ext/evoasm-free-list.c.tmpl +0 -121
  130. data/ext/evoasm_ext/evoasm-free-list.h.tmpl +0 -86
  131. data/ext/evoasm_ext/evoasm-log.h +0 -69
  132. data/ext/evoasm_ext/evoasm-misc.c +0 -23
  133. data/ext/evoasm_ext/evoasm-misc.h +0 -282
  134. data/ext/evoasm_ext/evoasm-param.h +0 -37
  135. data/ext/evoasm_ext/evoasm-search.c +0 -2145
  136. data/ext/evoasm_ext/evoasm-search.h +0 -214
  137. data/ext/evoasm_ext/evoasm-util.h +0 -40
  138. data/ext/evoasm_ext/evoasm-x64.c +0 -275624
  139. data/ext/evoasm_ext/evoasm-x64.h +0 -5436
  140. data/ext/evoasm_ext/evoasm.c +0 -7
  141. data/ext/evoasm_ext/evoasm.h +0 -23
  142. data/ext/evoasm_ext/evoasm_ext.c +0 -1757
  143. data/ext/evoasm_ext/extconf.rb +0 -31
  144. data/lib/evoasm/cli.rb +0 -6
  145. data/lib/evoasm/cli/search.rb +0 -127
  146. data/lib/evoasm/core_ext.rb +0 -1
  147. data/lib/evoasm/core_ext/array.rb +0 -9
  148. data/lib/evoasm/core_ext/integer.rb +0 -10
  149. data/lib/evoasm/core_ext/kwstruct.rb +0 -13
  150. data/lib/evoasm/core_ext/range.rb +0 -5
  151. data/lib/evoasm/examples.rb +0 -27
  152. data/lib/evoasm/gen.rb +0 -8
  153. data/lib/evoasm/gen/enum.rb +0 -169
  154. data/lib/evoasm/gen/name_util.rb +0 -80
  155. data/lib/evoasm/gen/state.rb +0 -176
  156. data/lib/evoasm/gen/state_dsl.rb +0 -152
  157. data/lib/evoasm/gen/strio.rb +0 -27
  158. data/lib/evoasm/gen/translator.rb +0 -1102
  159. data/lib/evoasm/gen/version.rb +0 -5
  160. data/lib/evoasm/gen/x64.rb +0 -237
  161. data/lib/evoasm/gen/x64/funcs.rb +0 -495
  162. data/lib/evoasm/gen/x64/inst.rb +0 -781
  163. data/lib/evoasm/search.rb +0 -40
  164. data/lib/evoasm/tasks/gen_task.rb +0 -86
  165. data/lib/evoasm/tasks/template_task.rb +0 -52
  166. data/test/test_helper.rb +0 -1
  167. data/test/x64/test_helper.rb +0 -19
  168. data/test/x64/x64_test.rb +0 -87
@@ -0,0 +1,107 @@
1
+ /*
2
+ * Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
3
+ *
4
+ * This program is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Affero General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+
18
+ #pragma once
19
+
20
+ #include <stdalign.h>
21
+ #include "evoasm-error.h"
22
+ #include "evoasm-pop-params.h"
23
+
24
+ typedef struct {
25
+ evoasm_loss_t *samples;
26
+ uint8_t *counters;
27
+ } evoasm_pop_loss_data_t;
28
+
29
+ typedef struct {
30
+ int16_t *jmp_offs;
31
+ uint8_t *jmp_cond;
32
+ } evoasm_pop_program_data_t;
33
+
34
+ typedef struct {
35
+ float *pheromones;
36
+ uint16_t *sizes;
37
+ evoasm_pop_program_data_t program_data;
38
+ } evoasm_pop_module_data_t;
39
+
40
+ typedef struct {
41
+ evoasm_inst_id_t *insts;
42
+ union {
43
+ evoasm_x64_basic_params_t *x64;
44
+ void *data;
45
+ } params;
46
+ } evoasm_pop_kernel_data_t;
47
+
48
+ struct evoasm_deme_s {
49
+ evoasm_prng_t prng;
50
+ uint16_t *blessed_indiv_idxs;
51
+ uint16_t *doomed_indiv_idxs;
52
+ evoasm_pop_program_data_t parent_program_data;
53
+ evoasm_pop_kernel_data_t parent_kernel_data;
54
+ evoasm_program_t program;
55
+ uint64_t *error_counters;
56
+ uint64_t error_counter;
57
+ evoasm_pop_loss_data_t loss_data;
58
+ evoasm_pop_program_data_t program_data;
59
+ evoasm_pop_kernel_data_t kernel_data;
60
+ evoasm_loss_t *top_losses;
61
+
62
+ evoasm_loss_t best_loss;
63
+ evoasm_pop_program_data_t best_program_data;
64
+ evoasm_pop_kernel_data_t best_kernel_data;
65
+
66
+ uint16_t n_doomed_indivs;
67
+ uint16_t n_blessed_indivs;
68
+ uint16_t n_examples;
69
+ evoasm_arch_id_t arch_id;
70
+ evoasm_pop_params_t *params;
71
+ evoasm_domain_t *domains;
72
+ } evoasm_aligned(EVOASM_CACHE_LINE_SIZE) ;
73
+
74
+ typedef struct evoasm_deme_s evoasm_deme_t;
75
+
76
+ typedef struct evoasm_pop_s {
77
+ evoasm_pop_params_t *params;
78
+ evoasm_domain_t *domains;
79
+ evoasm_deme_t *demes;
80
+ evoasm_pop_module_data_t module_data;
81
+ bool seeded : 1;
82
+ evoasm_loss_t *summary_losses;
83
+
84
+ } evoasm_pop_t;
85
+
86
+ evoasm_success_t
87
+ evoasm_pop_init(evoasm_pop_t *pop,
88
+ evoasm_arch_id_t arch_id,
89
+ evoasm_pop_params_t *params);
90
+
91
+
92
+ evoasm_success_t
93
+ evoasm_pop_eval(evoasm_pop_t *pop);
94
+
95
+ void
96
+ evoasm_pop_next_gen(evoasm_pop_t *pop);
97
+
98
+
99
+ evoasm_success_t
100
+ evoasm_pop_seed(evoasm_pop_t *pop);
101
+
102
+ void
103
+ evoasm_pop_destroy(evoasm_pop_t *pop);
104
+
105
+ //void
106
+ //evoasm_pop_inject(evoasm_pop_t *pop, evoasm_indiv_t *indiv, size_t indiv_size, evoasm_loss_t loss);
107
+
@@ -0,0 +1,116 @@
1
+ /*
2
+ * Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
3
+ *
4
+ * This program is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Affero General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+
18
+ #include "evoasm-alloc.h"
19
+ #include "evoasm-program-io.h"
20
+ #include <stdarg.h>
21
+
22
+ static const char *const _evoasm_example_type_names[] = {
23
+ "i64",
24
+ "u64",
25
+ "f64"
26
+ };
27
+
28
+ evoasm_program_io_t *
29
+ evoasm_program_io_alloc(size_t len) {
30
+ evoasm_program_io_t *program_io = evoasm_malloc(sizeof(evoasm_program_io_t) + len * sizeof(evoasm_program_io_val_t));
31
+ program_io->len = (uint16_t) len;
32
+
33
+ return program_io;
34
+ }
35
+
36
+ evoasm_success_t
37
+ evoasm_program_io_init(evoasm_program_io_t *program_io, size_t arity, ...) {
38
+ va_list args;
39
+ bool retval = true;
40
+
41
+ if(arity > EVOASM_PROGRAM_IO_MAX_ARITY) {
42
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
43
+ "Maximum arity exceeded (%zu > %d)", arity, EVOASM_PROGRAM_IO_MAX_ARITY);
44
+ retval = false;
45
+ goto done;
46
+ }
47
+
48
+ program_io->arity = (uint8_t) arity;
49
+
50
+ va_start(args, arity);
51
+ for(size_t i = 0; i < program_io->len; i++) {
52
+ size_t type_idx = i % arity;
53
+ evoasm_program_io_val_type_t type = va_arg(args, evoasm_program_io_val_type_t);
54
+ evoasm_program_io_val_t val;
55
+ switch(type) {
56
+ case EVOASM_PROGRAM_IO_VAL_TYPE_F64:
57
+ val.f64 = va_arg(args, double);
58
+ break;
59
+ case EVOASM_PROGRAM_IO_VAL_TYPE_I64:
60
+ val.i64 = va_arg(args, int64_t);
61
+ break;
62
+ case EVOASM_PROGRAM_IO_VAL_TYPE_U64:
63
+ val.u64 = va_arg(args, uint64_t);
64
+ break;
65
+ default:
66
+ evoasm_assert_not_reached();
67
+ }
68
+
69
+ program_io->vals[i] = val;
70
+
71
+ if(i >= arity) {
72
+ evoasm_program_io_val_type_t prev_type = program_io->types[type_idx];
73
+
74
+ if(prev_type != type) {
75
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
76
+ "Example value type mismatch (previously %s, now %s)",
77
+ _evoasm_example_type_names[prev_type], _evoasm_example_type_names[type]);
78
+ retval = false;
79
+ goto done;
80
+ }
81
+ }
82
+ program_io->types[type_idx] = type;
83
+ }
84
+
85
+
86
+ done:
87
+ va_end(args);
88
+ return retval;
89
+ }
90
+
91
+ double
92
+ evoasm_program_io_get_value_f64(evoasm_program_io_t *program_io, size_t idx) {
93
+ return program_io->vals[idx].f64;
94
+ }
95
+
96
+ int64_t
97
+ evoasm_program_io_get_value_i64(evoasm_program_io_t *program_io, size_t idx) {
98
+ return program_io->vals[idx].i64;
99
+ }
100
+
101
+ void
102
+ evoasm_program_io_destroy(evoasm_program_io_t *program_io) {
103
+
104
+ }
105
+
106
+ evoasm_program_io_val_type_t
107
+ evoasm_program_io_get_type(evoasm_program_io_t *program_io, size_t idx) {
108
+ return program_io->types[idx % program_io->arity];
109
+ }
110
+
111
+ EVOASM_DEF_FREE_FUNC(program_io)
112
+
113
+ EVOASM_DEF_GETTER(program_io, arity, size_t)
114
+
115
+ EVOASM_DEF_GETTER(program_io, len, size_t)
116
+
@@ -0,0 +1,60 @@
1
+ /*
2
+ * Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
3
+ *
4
+ * This program is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Affero General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+
18
+ #pragma once
19
+
20
+ #include <stdint.h>
21
+
22
+ #define EVOASM_PROGRAM_IO_MAX_ARITY 8
23
+
24
+ typedef enum {
25
+ EVOASM_PROGRAM_IO_VAL_TYPE_I64,
26
+ EVOASM_PROGRAM_IO_VAL_TYPE_U64,
27
+ EVOASM_PROGRAM_IO_VAL_TYPE_F64,
28
+ } evoasm_program_io_val_type_t;
29
+
30
+ typedef union {
31
+ double f64;
32
+ int64_t i64;
33
+ uint64_t u64;
34
+ } evoasm_program_io_val_t;
35
+
36
+ typedef struct {
37
+ uint8_t arity;
38
+ uint16_t len;
39
+ evoasm_program_io_val_type_t types[EVOASM_PROGRAM_IO_MAX_ARITY];
40
+ evoasm_program_io_val_t vals[1];
41
+ } evoasm_program_io_t;
42
+
43
+ #define EVOASM_PROGRAM_OUTPUT_MAX_ARITY EVOASM_PROGRAM_IO_MAX_ARITY
44
+ #define EVOASM_PROGRAM_INPUT_MAX_ARITY EVOASM_PROGRAM_IO_MAX_ARITY
45
+
46
+ typedef evoasm_program_io_t evoasm_program_output_t;
47
+ typedef evoasm_program_io_t evoasm_program_input_t;
48
+
49
+ #define EVOASM_PROGRAM_IO_N_EXAMPLES(program_io) ((size_t)((program_io)->len / (program_io)->arity))
50
+ #define EVOASM_PROGRAM_INPUT_N_TUPLES(program_input) EVOASM_PROGRAM_IO_N_EXAMPLES((evoasm_program_io_t *)program_input)
51
+ #define EVOASM_PROGRAM_OUTPUT_N_TUPLES(program_output) EVOASM_PROGRAM_IO_N_EXAMPLES((evoasm_program_io_t *)program_output)
52
+
53
+ evoasm_program_io_t *
54
+ evoasm_program_io_alloc(size_t len);
55
+
56
+ void
57
+ evoasm_program_io_destroy(evoasm_program_io_t *program_io);
58
+
59
+ #define evoasm_program_output_destroy(program_output) \
60
+ evoasm_program_io_destroy((evoasm_program_io *)program_output)
@@ -0,0 +1,1827 @@
1
+ /*
2
+ * Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
3
+ *
4
+ * This program is free software: you can redistribute it and/or modify
5
+ * it under the terms of the GNU Affero General Public License as published by
6
+ * the Free Software Foundation, either version 3 of the License, or
7
+ * (at your option) any later version.
8
+ *
9
+ * This program is distributed in the hope that it will be useful,
10
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ * GNU Affero General Public License for more details.
13
+ *
14
+ * You should have received a copy of the GNU Affero General Public License
15
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+
18
+ #include "evoasm-signal.h"
19
+ #include "evoasm-program.h"
20
+ #include "evoasm-arch.h"
21
+ #include "evoasm.h"
22
+ #include "evoasm-x64.h"
23
+ #include "evoasm-param.h"
24
+ #include "evoasm-program-io.h"
25
+
26
+
27
+ EVOASM_DEF_LOG_TAG("program")
28
+
29
+ static inline double
30
+ evoasm_program_io_val_to_dbl(evoasm_program_io_val_t io_val, evoasm_program_io_val_type_t io_val_type) {
31
+ switch(io_val_type) {
32
+ case EVOASM_PROGRAM_IO_VAL_TYPE_F64:
33
+ return io_val.f64;
34
+ case EVOASM_PROGRAM_IO_VAL_TYPE_I64:
35
+ return (double) io_val.i64;
36
+ default:
37
+ evoasm_log_fatal("unsupported input/output value type %d", io_val_type);
38
+ evoasm_assert_not_reached();
39
+ }
40
+ }
41
+
42
+ bool
43
+ evoasm_program_destroy(evoasm_program_t *program) {
44
+
45
+ bool retval = true;
46
+
47
+ if(!program->shallow) {
48
+ for(size_t i = 0; i < program->size; i++) {
49
+ evoasm_kernel_t *kernel = &program->kernels[i];
50
+ evoasm_free(kernel->insts);
51
+ switch(program->arch_info->id) {
52
+ case EVOASM_ARCH_X64:
53
+ evoasm_free(kernel->params.x64);
54
+ break;
55
+ default:
56
+ evoasm_assert_not_reached();
57
+ }
58
+ }
59
+
60
+ }
61
+
62
+ evoasm_free(program->jmp_offs);
63
+ evoasm_free(program->jmp_conds);
64
+ evoasm_free(program->kernels);
65
+ evoasm_free(program->recur_counters);
66
+ evoasm_free(program->output_vals);
67
+
68
+ if(program->buf) {
69
+ if(!evoasm_buf_destroy(program->buf)) {
70
+ retval = false;
71
+ }
72
+ }
73
+
74
+ if(program->body_buf) {
75
+ if(!evoasm_buf_destroy(program->body_buf)) {
76
+ retval = false;
77
+ }
78
+ }
79
+
80
+ return retval;
81
+ }
82
+
83
+ #if 0
84
+ evoasm_success_t
85
+ evoasm_program_clone(evoasm_program_t *program, evoasm_program_t *cloned_program) {
86
+ size_t i = 0;
87
+
88
+ *cloned_program = *program;
89
+ cloned_program->reset_rflags = false;
90
+ cloned_program->_input.len = 0;
91
+ cloned_program->_output.len = 0;
92
+ cloned_program->output_vals = NULL;
93
+ cloned_program->buf = NULL;
94
+ cloned_program->body_buf = NULL;
95
+
96
+ /* memory addresses in original buffer point to memory in original program,
97
+ * we need to reemit assembly, this is done in a lazy fashion */
98
+ cloned_program->need_emit = true;
99
+
100
+ EVOASM_TRY(error, evoasm_buf_clone, program->buf, &cloned_program->_buf);
101
+ cloned_program->buf = &cloned_program->_buf;
102
+ EVOASM_TRY(error, evoasm_buf_clone, program->body_buf, &cloned_program->_body_buf);
103
+ cloned_program->body_buf = &cloned_program->_body_buf;
104
+
105
+ size_t program_params_size = sizeof(evoasm_program_params_t);
106
+ cloned_program->params = evoasm_malloc(program_params_size);
107
+
108
+ if(!cloned_program->params) {
109
+ goto error;
110
+ }
111
+
112
+ memcpy(cloned_program->params, program->params, program_params_size);
113
+
114
+ for(; i < program->size; i++) {
115
+ evoasm_kernel_t *orig_kernel = &program->kernels[i];
116
+ evoasm_kernel_t *cloned_kernel = &cloned_program->kernels[i];
117
+ *cloned_kernel = *orig_kernel;
118
+
119
+ size_t params_size =
120
+ sizeof(evoasm_kernel_params_t) + orig_kernel->size * sizeof(evoasm_kernel_param_t);
121
+ cloned_kernel->params = evoasm_malloc(params_size);
122
+ if(!cloned_kernel->params) {
123
+ goto error;
124
+ }
125
+ memcpy(cloned_kernel->params, orig_kernel->params, params_size);
126
+ }
127
+
128
+ return true;
129
+
130
+ error:
131
+ (void) evoasm_program_destroy_(cloned_program, i);
132
+ return false;
133
+ }
134
+ #endif
135
+
136
+ evoasm_buf_t *
137
+ evoasm_program_get_buf(evoasm_program_t *program, bool body) {
138
+ if(body) {
139
+ return program->body_buf;
140
+ } else {
141
+ return program->buf;
142
+ }
143
+ }
144
+
145
+ size_t
146
+ evoasm_program_get_size(evoasm_program_t *program) {
147
+ return program->size;
148
+ }
149
+
150
+ size_t
151
+ evoasm_program_get_kernel_code(evoasm_program_t *program, size_t kernel_idx, const uint8_t **code) {
152
+ evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
153
+ size_t len = (size_t) kernel->buf_end - kernel->buf_start;
154
+ *code = program->body_buf->data + kernel->buf_start;
155
+ return len;
156
+ }
157
+
158
+ size_t
159
+ evoasm_program_get_code(evoasm_program_t *program, bool frame, const uint8_t **code) {
160
+ evoasm_buf_t *buf;
161
+ if(frame) {
162
+ buf = program->buf;
163
+ } else {
164
+ buf = program->body_buf;
165
+ }
166
+ *code = buf->data;
167
+ return buf->pos;
168
+ }
169
+
170
+
171
+ int
172
+ evoasm_program_get_jmp_off(evoasm_program_t *program, size_t pos) {
173
+ return program->jmp_offs[pos];
174
+ }
175
+
176
+
177
+ bool
178
+ evoasm_program_is_input_reg(evoasm_program_t *program, size_t kernel_idx, evoasm_reg_id_t reg_id) {
179
+ evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
180
+ switch(program->arch_info->id) {
181
+ case EVOASM_ARCH_X64:
182
+ return kernel->reg_info.x64.regs[reg_id].input;
183
+ default:
184
+ evoasm_assert_not_reached();
185
+ }
186
+ }
187
+
188
+ bool
189
+ evoasm_program_is_output_reg(evoasm_program_t *program, size_t kernel_idx, evoasm_reg_id_t reg_id) {
190
+ evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
191
+ switch(program->arch_info->id) {
192
+ case EVOASM_ARCH_X64:
193
+ return kernel->reg_info.x64.regs[reg_id].output;
194
+ default:
195
+ evoasm_assert_not_reached();
196
+ }
197
+ }
198
+
199
+ #define EVOASM_PROGRAM_TMP_REG_X64 EVOASM_X64_REG_14
200
+
201
+ static evoasm_success_t
202
+ evoasm_program_x64_emit_rflags_reset(evoasm_program_t *program) {
203
+ evoasm_x64_params_t params = {0};
204
+ evoasm_buf_t *buf = program->buf;
205
+
206
+ evoasm_log_debug("emitting RFLAGS reset");
207
+ EVOASM_X64_ENC(pushfq);
208
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_X64_REG_SP);
209
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, 0);
210
+ EVOASM_X64_ENC(mov_rm64_imm32);
211
+ EVOASM_X64_ENC(popfq);
212
+
213
+ return true;
214
+ enc_failed:
215
+ return false;
216
+ }
217
+
218
+ static evoasm_used evoasm_success_t
219
+ evoasm_program_x64_emit_mxcsr_reset(evoasm_program_t *program) {
220
+ static uint32_t default_mxcsr_val = 0x1f80;
221
+ evoasm_x64_params_t params = {0};
222
+ evoasm_buf_t *buf = program->buf;
223
+
224
+ evoasm_param_val_t addr_imm = (evoasm_param_val_t) (uintptr_t) &default_mxcsr_val;
225
+ evoasm_x64_reg_id_t reg_tmp0 = EVOASM_PROGRAM_TMP_REG_X64;
226
+
227
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, reg_tmp0);
228
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, addr_imm);
229
+ EVOASM_X64_ENC(mov_r32_imm32);
230
+
231
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, reg_tmp0);
232
+ EVOASM_X64_ENC(ldmxcsr_m32);
233
+
234
+ return true;
235
+ enc_failed:
236
+ return false;
237
+ }
238
+
239
+
240
+ static evoasm_success_t
241
+ evoasm_program_x64_emit_output_store(evoasm_program_t *program,
242
+ size_t tuple_idx) {
243
+
244
+ evoasm_x64_params_t params = {0};
245
+ evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
246
+ evoasm_buf_t *buf = program->buf;
247
+
248
+ for(size_t i = 0; i < kernel->n_output_regs; i++) {
249
+ evoasm_x64_reg_id_t reg_id = kernel->output_regs.x64[i];
250
+ evoasm_program_io_val_t *val_addr = &program->output_vals[(tuple_idx * kernel->n_output_regs) + i];
251
+ evoasm_x64_reg_type_t reg_type = evoasm_x64_get_reg_type(reg_id);
252
+
253
+ evoasm_param_val_t addr_imm = (evoasm_param_val_t) (uintptr_t) val_addr;
254
+
255
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
256
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, addr_imm);
257
+ EVOASM_X64_ENC(mov_r64_imm64);
258
+
259
+ switch(reg_type) {
260
+ case EVOASM_X64_REG_TYPE_GP: {
261
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, reg_id);
262
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
263
+ EVOASM_X64_ENC(mov_rm64_r64);
264
+ break;
265
+ }
266
+ case EVOASM_X64_REG_TYPE_XMM: {
267
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, reg_id);
268
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
269
+ EVOASM_X64_ENC(movsd_xmmm64_xmm);
270
+ break;
271
+ }
272
+ default: {
273
+ evoasm_assert_not_reached();
274
+ }
275
+ }
276
+ }
277
+
278
+ return true;
279
+
280
+ enc_failed:
281
+ return false;
282
+ }
283
+
284
+ static evoasm_x64_reg_id_t
285
+ evoasm_kernel_get_operand_reg_id_x64(evoasm_kernel_t *kernel, evoasm_x64_operand_t *op, size_t inst_idx) {
286
+ evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[inst_idx]);
287
+
288
+ if(op->param_idx < inst->n_params) {
289
+ return (evoasm_x64_reg_id_t) evoasm_x64_basic_params_get_(&kernel->params.x64[inst_idx],
290
+ (evoasm_x64_basic_param_id_t) inst->params[op->param_idx].id);
291
+ } else if(op->reg_id < EVOASM_X64_REG_NONE) {
292
+ return (evoasm_x64_reg_id_t) op->reg_id;
293
+ } else {
294
+ evoasm_assert_not_reached();
295
+ return EVOASM_X64_REG_NONE;
296
+ }
297
+ }
298
+
299
+ typedef struct {
300
+ evoasm_bitmap512_t mask;
301
+ } evoasm_x64_reg_liveness_t;
302
+
303
+ static void
304
+ evoasm_x64_reg_liveness_or_mask(evoasm_x64_inst_t *inst, evoasm_x64_operand_t *op, evoasm_x64_basic_params_t *params,
305
+ evoasm_bitmap512_t *mask512) {
306
+ evoasm_bitmap_t *mask = (evoasm_bitmap_t *) mask512;
307
+ switch(op->word) {
308
+ case EVOASM_X64_OPERAND_WORD_LB:
309
+ if(!op->implicit && op->param_idx < inst->n_params &&
310
+ (
311
+ (inst->params[op->param_idx].id == EVOASM_X64_BASIC_PARAM_REG0 && params->reg0_high_byte)
312
+ ||
313
+ (inst->params[op->param_idx].id == EVOASM_X64_BASIC_PARAM_REG1 && params->reg1_high_byte)
314
+ )) {
315
+ goto hb;
316
+ }
317
+ evoasm_bitmap_or64(mask, 0, 0x00ffu);
318
+ break;
319
+ case EVOASM_X64_OPERAND_WORD_HB: {
320
+ hb:
321
+ evoasm_bitmap_or64(mask, 0, 0xff00u);
322
+ break;
323
+ }
324
+ case EVOASM_X64_OPERAND_WORD_W:
325
+ evoasm_bitmap_or64(mask, 0, 0xffffu);
326
+ break;
327
+ case EVOASM_X64_OPERAND_WORD_DW:
328
+ /* 32bit writes clear the whole register */
329
+ if(op->reg_type == EVOASM_X64_REG_TYPE_GP) {
330
+ evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
331
+ } else {
332
+ /* xmm[0..31] does this for example */
333
+ evoasm_bitmap_or64(mask, 0, 0xffffffffu);
334
+ }
335
+ break;
336
+ case EVOASM_X64_OPERAND_WORD_LQW:
337
+ evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
338
+ break;
339
+ case EVOASM_X64_OPERAND_WORD_HQW:
340
+ evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
341
+ break;
342
+ case EVOASM_X64_OPERAND_WORD_DQW:
343
+ evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
344
+ evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
345
+ break;
346
+ case EVOASM_X64_OPERAND_WORD_VW:
347
+ evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
348
+ evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
349
+ evoasm_bitmap_or64(mask, 2, 0xffffffffffffffffull);
350
+ evoasm_bitmap_or64(mask, 3, 0xffffffffffffffffull);
351
+ break;
352
+ default:
353
+ evoasm_assert_not_reached();
354
+ }
355
+ }
356
+
357
+ static void
358
+ evoasm_x64_reg_liveness_update(evoasm_x64_reg_liveness_t *reg_liveness, evoasm_x64_inst_t *inst,
359
+ evoasm_x64_operand_t *op, evoasm_x64_basic_params_t *params) {
360
+ evoasm_x64_reg_liveness_or_mask(inst, op, params, &reg_liveness->mask);
361
+ }
362
+
363
+
364
+ static bool
365
+ evoasm_x64_reg_liveness_is_dirty_read_(evoasm_x64_reg_liveness_t *reg_liveness,
366
+ evoasm_bitmap512_t *mask) {
367
+
368
+ evoasm_bitmap512_andn(mask, &reg_liveness->mask, mask);
369
+ return !evoasm_bitmap512_is_zero(mask);
370
+ }
371
+
372
+ static bool
373
+ evoasm_x64_reg_liveness_is_dirty_read(evoasm_x64_reg_liveness_t *reg_liveness, evoasm_x64_inst_t *inst,
374
+ evoasm_x64_operand_t *op,
375
+ evoasm_x64_basic_params_t *params) {
376
+
377
+ evoasm_bitmap512_t mask = {0};
378
+ evoasm_x64_reg_liveness_or_mask(inst, op, params, &mask);
379
+
380
+ return evoasm_x64_reg_liveness_is_dirty_read_(reg_liveness, &mask);
381
+ }
382
+
383
+
384
+ static bool
385
+ evoasm_kernel_is_writing_inst_x64(evoasm_kernel_t *kernel, size_t inst_idx, evoasm_reg_id_t reg_id,
386
+ evoasm_x64_reg_liveness_t *reg_liveness) {
387
+ evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[inst_idx]);
388
+
389
+ for(size_t i = 0; i < x64_inst->n_operands; i++) {
390
+ evoasm_x64_operand_t *op = &x64_inst->operands[i];
391
+ evoasm_x64_reg_id_t op_reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, inst_idx);
392
+ evoasm_x64_basic_params_t *x64_basic_params = &kernel->params.x64[inst_idx];
393
+
394
+ if(op->written && op_reg_id == reg_id &&
395
+ evoasm_x64_reg_liveness_is_dirty_read(reg_liveness, x64_inst, op, x64_basic_params)) {
396
+ evoasm_x64_reg_liveness_update(reg_liveness, x64_inst, op, x64_basic_params);
397
+ return true;
398
+ }
399
+ }
400
+ return false;
401
+ }
402
+
403
+
404
+ static void
405
+ evoasm_x64_reg_liveness_init(evoasm_x64_reg_liveness_t *reg_liveness) {
406
+ static evoasm_x64_reg_liveness_t zero_reg_liveness = {0};
407
+ *reg_liveness = zero_reg_liveness;
408
+ }
409
+
410
+ static evoasm_success_t
411
+ evoasm_program_x64_prepare_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel) {
412
+ /* NOTE: output register are register that are written to
413
+ * _input registers are register that are read from without
414
+ * a previous write
415
+ */
416
+ evoasm_x64_reg_liveness_t reg_livenesses[EVOASM_X64_REG_NONE];
417
+ for(int i = 0; i < EVOASM_X64_REG_NONE; i++) {
418
+ evoasm_x64_reg_liveness_init(&reg_livenesses[i]);
419
+ }
420
+
421
+ kernel->n_input_regs = 0;
422
+ kernel->n_output_regs = 0;
423
+
424
+ static evoasm_kernel_reg_info_t zero_reg_info = {0};
425
+ kernel->reg_info = zero_reg_info;
426
+
427
+ /* First, handle read ops, so that writing ops do not disturb us */
428
+ for(size_t i = 0; i < kernel->size; i++) {
429
+ evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
430
+ evoasm_x64_basic_params_t *x64_basic_params = &kernel->params.x64[i];
431
+
432
+ for(size_t j = 0; j < x64_inst->n_operands; j++) {
433
+ evoasm_x64_operand_t *op = &x64_inst->operands[j];
434
+
435
+ if((op->read || op->maybe_written) &&
436
+ (op->type == EVOASM_X64_OPERAND_TYPE_REG || op->type == EVOASM_X64_OPERAND_TYPE_RM)) {
437
+
438
+ if(op->reg_type == EVOASM_X64_REG_TYPE_RFLAGS) {
439
+ program->reset_rflags = true;
440
+ } else {
441
+ evoasm_x64_reg_id_t reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) i);
442
+ evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
443
+ evoasm_x64_reg_liveness_t *reg_liveness = &reg_livenesses[reg_id];
444
+
445
+ if(!reg_info->input) {
446
+ // has not been written before, might contain garbage
447
+ bool dirty_read;
448
+
449
+ /* the writer rank check is needed for the following case
450
+ * inst regX (operand1, written), regX (operand2, read)
451
+ *
452
+ * The first operand marks regX as written. The read in the second
453
+ * operand, however, is dirty, since the write has not yet occurred at this point.
454
+ */
455
+
456
+ if(reg_info->written) {
457
+ dirty_read = evoasm_x64_reg_liveness_is_dirty_read(reg_liveness, x64_inst, op, x64_basic_params);
458
+ } else {
459
+ dirty_read = true;
460
+ }
461
+
462
+ if(dirty_read) {
463
+ reg_info->input = true;
464
+ kernel->n_input_regs++;
465
+ }
466
+ }
467
+ }
468
+ }
469
+ }
470
+
471
+ for(size_t j = 0; j < x64_inst->n_operands; j++) {
472
+ evoasm_x64_operand_t *op = &x64_inst->operands[j];
473
+
474
+ if(op->written && (op->type == EVOASM_X64_OPERAND_TYPE_REG || op->type == EVOASM_X64_OPERAND_TYPE_RM)) {
475
+
476
+ if(op->reg_type == EVOASM_X64_REG_TYPE_RFLAGS) {
477
+ kernel->reg_info.x64.written_flags =
478
+ (kernel->reg_info.x64.written_flags | op->written_flags) & EVOASM_X64_RFLAGS_FLAGS_BITSIZE;
479
+ kernel->reg_info.x64.regs[EVOASM_X64_REG_RFLAGS].written = true;
480
+ } else {
481
+ evoasm_x64_reg_id_t reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) i);
482
+ evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
483
+ evoasm_x64_reg_liveness_t *reg_liveness = &reg_livenesses[reg_id];
484
+
485
+ if(!reg_info->written) {
486
+ reg_info->written = true;
487
+ reg_info->output = true;
488
+ kernel->output_regs.x64[kernel->n_output_regs] = reg_id;
489
+ kernel->n_output_regs++;
490
+ }
491
+
492
+ evoasm_x64_reg_liveness_update(reg_liveness, x64_inst, op, x64_basic_params);
493
+ }
494
+ }
495
+ }
496
+ }
497
+
498
+ for(int i = 0; i < kernel->n_output_regs; i++) {
499
+ evoasm_x64_reg_id_t reg_id = kernel->output_regs.x64[i];
500
+ evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
501
+ if(!reg_info->input) {
502
+ evoasm_x64_reg_liveness_t *reg_liveness = &reg_livenesses[reg_id];
503
+
504
+ evoasm_bitmap512_t mask = {0};
505
+
506
+ switch(evoasm_x64_get_reg_type(reg_id)) {
507
+ case EVOASM_X64_REG_TYPE_GP:
508
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 0, 0xffffffffffffffffull);
509
+ break;
510
+ case EVOASM_X64_REG_TYPE_XMM:
511
+ case EVOASM_X64_REG_TYPE_ZMM:
512
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 0, 0xffffffffffffffffull);
513
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 1, 0xffffffffffffffffull);
514
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 2, 0xffffffffffffffffull);
515
+ evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 3, 0xffffffffffffffffull);
516
+ break;
517
+ default:
518
+ evoasm_assert_not_reached();
519
+ }
520
+
521
+ bool dirty_read = evoasm_x64_reg_liveness_is_dirty_read_(reg_liveness, &mask);
522
+ if(dirty_read) {
523
+ reg_info->input = true;
524
+ kernel->n_input_regs++;
525
+ }
526
+ }
527
+ }
528
+
529
+ #ifdef EVOASM_ENABLE_PARANOID_MODE
530
+ {
531
+ size_t n_input_regs = 0;
532
+ for(evoasm_x64_reg_id_t i = (evoasm_x64_reg_id_t) 0; i < EVOASM_X64_REG_NONE; i++) {
533
+ if(kernel->reg_info.x64.regs[i].input) n_input_regs++;
534
+ }
535
+ assert(n_input_regs == kernel->n_input_regs);
536
+ }
537
+ #endif
538
+
539
+ assert(kernel->n_output_regs <= EVOASM_KERNEL_MAX_OUTPUT_REGS);
540
+ assert(kernel->n_input_regs <= EVOASM_KERNEL_MAX_INPUT_REGS);
541
+
542
+ if(kernel->n_output_regs == 0) {
543
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_PROGRAM_ERROR_CODE_NO_OUTPUT, NULL);
544
+ return false;
545
+ }
546
+
547
+ return true;
548
+ }
549
+
550
+ static evoasm_success_t
551
+ evoasm_program_x64_prepare(evoasm_program_t *program) {
552
+ for(size_t i = 0; i < program->size; i++) {
553
+ evoasm_kernel_t *kernel = &program->kernels[i];
554
+ EVOASM_TRY(error, evoasm_program_x64_prepare_kernel, program, kernel);
555
+ }
556
+
557
+ return true;
558
+
559
+ error:
560
+ return false;
561
+
562
+ }
563
+
564
+
565
+ static evoasm_success_t
566
+ evoasm_program_x64_emit_input_reg_load(evoasm_x64_reg_id_t input_reg_id,
567
+ evoasm_buf_t *buf,
568
+ evoasm_program_io_val_t *tuple,
569
+ evoasm_program_io_val_t *loaded_tuple,
570
+ bool force_load) {
571
+
572
+ evoasm_x64_reg_type_t reg_type = evoasm_x64_get_reg_type(input_reg_id);
573
+ evoasm_x64_params_t params = {0};
574
+
575
+ evoasm_log_debug("emitting _input register initialization of register %d to value %"
576
+ PRId64, input_reg_id, tuple->i64);
577
+
578
+ switch(reg_type) {
579
+ case EVOASM_X64_REG_TYPE_GP: {
580
+ if(force_load) {
581
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
582
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) (uintptr_t) &tuple->i64);
583
+ EVOASM_X64_ENC(mov_r64_imm64);
584
+
585
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
586
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
587
+ EVOASM_X64_ENC(mov_r64_rm64);
588
+ } else {
589
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
590
+ /*FIXME: hard-coded tuple type */
591
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) tuple->i64);
592
+ EVOASM_X64_ENC(mov_r64_imm64);
593
+ }
594
+ break;
595
+ }
596
+ case EVOASM_X64_REG_TYPE_XMM: {
597
+ /* load address of tuple into tmp_reg */
598
+ if(loaded_tuple != tuple) {
599
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
600
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) (uintptr_t) &tuple->f64);
601
+ EVOASM_X64_ENC(mov_r64_imm64);
602
+ loaded_tuple = tuple;
603
+ }
604
+
605
+ /* load into xmm via address in tmp_reg */
606
+ /*FIXME: hard-coded tuple type */
607
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
608
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
609
+ EVOASM_X64_ENC(movsd_xmm_xmmm64);
610
+ break;
611
+ }
612
+ default:
613
+ evoasm_log_fatal("non-gpr register type (%d) (unimplemented)", reg_type);
614
+ evoasm_assert_not_reached();
615
+ }
616
+
617
+ return true;
618
+
619
+ enc_failed:
620
+ return false;
621
+ }
622
+
623
+
624
+ static evoasm_success_t
625
+ evoasm_program_x64_emit_input_load(evoasm_program_t *program,
626
+ evoasm_program_io_val_t *input_vals,
627
+ evoasm_program_io_val_type_t *types,
628
+ size_t in_arity,
629
+ bool set_io_mapping) {
630
+
631
+
632
+ evoasm_program_io_val_t *loaded_tuple = NULL;
633
+ evoasm_buf_t *buf = program->buf;
634
+ evoasm_kernel_t *kernel = &program->kernels[0];
635
+
636
+ evoasm_log_debug("n _input regs %d", kernel->n_input_regs);
637
+ #if 0
638
+ for(input_reg_id = (evoasm_x64_reg_id_t) 13; input_reg_id < 19; input_reg_id++) {
639
+ if(input_reg_id == EVOASM_X64_REG_SP) continue;
640
+ evoasm_x64_params_t params = {0};
641
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
642
+ /*FIXME: hard-coded tuple type */
643
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, 0);
644
+ EVOASM_X64_ENC(mov_r64_imm64);
645
+ }
646
+ #endif
647
+
648
+ {
649
+ size_t input_reg_idx = 0;
650
+ for(evoasm_x64_reg_id_t input_reg = (evoasm_x64_reg_id_t) 0; input_reg < EVOASM_X64_REG_NONE; input_reg++) {
651
+ if(!kernel->reg_info.x64.regs[input_reg].input) continue;
652
+
653
+ size_t tuple_idx;
654
+
655
+ if(set_io_mapping) {
656
+ tuple_idx = input_reg_idx++ % in_arity;
657
+ program->reg_inputs.x64[input_reg] = (uint8_t) tuple_idx;
658
+ } else {
659
+ tuple_idx = program->reg_inputs.x64[input_reg];
660
+ }
661
+
662
+ evoasm_program_io_val_t *tuple = &input_vals[tuple_idx];
663
+ EVOASM_TRY(error, evoasm_program_x64_emit_input_reg_load, input_reg, buf, tuple, loaded_tuple, false);
664
+ }
665
+ }
666
+
667
+ #ifdef EVOASM_ENABLE_PARANOID_MODE
668
+ EVOASM_TRY(error, evoasm_x64_emit_push, EVOASM_PROGRAM_TMP_REG_X64, buf);
669
+ for(evoasm_x64_reg_id_t non_input_reg = (evoasm_x64_reg_id_t) EVOASM_X64_REG_A;
670
+ non_input_reg < EVOASM_X64_REG_15; non_input_reg++) {
671
+ if(kernel->reg_info.x64.regs[non_input_reg].input) continue;
672
+ if(non_input_reg == EVOASM_X64_REG_SP) continue;
673
+
674
+ evoasm_program_io_val_t *tuple = &kernel->rand_vals[non_input_reg];
675
+ EVOASM_TRY(error, evoasm_program_x64_emit_input_reg_load, non_input_reg, buf, tuple, NULL, true);
676
+ }
677
+ EVOASM_TRY(error, evoasm_x64_emit_pop, EVOASM_PROGRAM_TMP_REG_X64, buf);
678
+ #endif
679
+
680
+ if(program->reset_rflags) {
681
+ EVOASM_TRY(error, evoasm_program_x64_emit_rflags_reset, program);
682
+ }
683
+ return true;
684
+
685
+ error:
686
+ return false;
687
+ }
688
+
689
+ static evoasm_success_t
690
+ evoasm_program_x64_emit_kernel_transition(evoasm_program_t *program,
691
+ evoasm_kernel_t *from_kernel,
692
+ evoasm_kernel_t *to_kernel,
693
+ evoasm_buf_t *buf,
694
+ size_t trans_idx,
695
+ bool set_io_mapping) {
696
+ size_t input_reg_idx;
697
+ evoasm_x64_reg_id_t input_reg_id;
698
+
699
+ assert(from_kernel->n_output_regs > 0);
700
+
701
+ for(input_reg_id = (evoasm_x64_reg_id_t) 0, input_reg_idx = 0; input_reg_id < EVOASM_X64_REG_NONE; input_reg_id++) {
702
+ if(!to_kernel->reg_info.x64.regs[input_reg_id].input) continue;
703
+
704
+ evoasm_x64_reg_id_t output_reg_id;
705
+
706
+ if(set_io_mapping) {
707
+ size_t output_reg_idx = input_reg_idx % from_kernel->n_output_regs;
708
+ output_reg_id = from_kernel->output_regs.x64[output_reg_idx];
709
+
710
+ from_kernel->reg_info.x64.trans_regs[trans_idx][input_reg_id] = output_reg_id;
711
+ } else {
712
+ output_reg_id = from_kernel->reg_info.x64.trans_regs[trans_idx][input_reg_id];
713
+ }
714
+
715
+ evoasm_x64_reg_type_t output_reg_type = evoasm_x64_get_reg_type(output_reg_id);
716
+ evoasm_x64_reg_type_t input_reg_type = evoasm_x64_get_reg_type(input_reg_id);
717
+ evoasm_x64_params_t params = {0};
718
+
719
+ if(input_reg_id != output_reg_id) {
720
+ if(output_reg_type == EVOASM_X64_REG_TYPE_GP &&
721
+ input_reg_type == EVOASM_X64_REG_TYPE_GP) {
722
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
723
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
724
+ EVOASM_X64_ENC(mov_r64_rm64);
725
+ } else if(output_reg_type == EVOASM_X64_REG_TYPE_XMM &&
726
+ input_reg_type == EVOASM_X64_REG_TYPE_XMM) {
727
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
728
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
729
+ if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
730
+ EVOASM_X64_ENC(vmovdqa_ymm_ymmm256);
731
+ } else {
732
+ EVOASM_X64_ENC(movdqa_xmm_xmmm128);
733
+ }
734
+ } else if(output_reg_type == EVOASM_X64_REG_TYPE_GP &&
735
+ input_reg_type == EVOASM_X64_REG_TYPE_XMM) {
736
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
737
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
738
+ if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
739
+ EVOASM_X64_ENC(vmovq_xmm_rm64);
740
+ } else {
741
+ EVOASM_X64_ENC(movq_xmm_rm64);
742
+ }
743
+ } else if(output_reg_type == EVOASM_X64_REG_TYPE_XMM &&
744
+ input_reg_type == EVOASM_X64_REG_TYPE_GP) {
745
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
746
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
747
+ if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
748
+ EVOASM_X64_ENC(vmovq_rm64_xmm);
749
+ } else {
750
+ EVOASM_X64_ENC(movq_rm64_xmm);
751
+ }
752
+ } else {
753
+ evoasm_assert_not_reached();
754
+ }
755
+ }
756
+ input_reg_idx++;
757
+ }
758
+
759
+ return true;
760
+
761
+ enc_failed:
762
+ return false;
763
+ }
764
+
765
+ #define EVOASM_BUF_PHI_GET(buf) ((uint32_t *)((buf)->data + (buf)->pos - 4))
766
+ #define EVOASM_BUF_PHI_SET(label, val) \
767
+ do { (*(label) = (uint32_t)((uint8_t *)(val) - ((uint8_t *)(label) + 4)));} while(0);
768
+ #define EVOASM_BUF_POS_ADDR(buf) (buf->data + buf->pos)
769
+
770
+ #define EVOASM_PROGRAM_X64_N_JMP_INSTS 16
771
+
772
+ static evoasm_success_t
773
+ evoasm_program_x64_emit_kernel_transitions(evoasm_program_t *program,
774
+ evoasm_kernel_t *kernel,
775
+ evoasm_kernel_t *next_kernel,
776
+ evoasm_kernel_t *branch_kernel,
777
+ evoasm_buf_t *buf,
778
+ uint32_t **branch_kernel_phi,
779
+ bool set_io_mapping) {
780
+
781
+ static const evoasm_x64_inst_id_t jmp_insts[] = {
782
+ EVOASM_X64_INST_JA_REL32, // 0
783
+ EVOASM_X64_INST_JAE_REL32, // 1
784
+ EVOASM_X64_INST_JB_REL32, // 2
785
+ EVOASM_X64_INST_JBE_REL32, // 3
786
+ EVOASM_X64_INST_JE_REL32, // 4
787
+ EVOASM_X64_INST_JG_REL32, // 5
788
+ EVOASM_X64_INST_JGE_REL32, // 6
789
+ EVOASM_X64_INST_JL_REL32, // 7
790
+ EVOASM_X64_INST_JLE_REL32, // 8
791
+ EVOASM_X64_INST_JNE_REL32, // 9
792
+ EVOASM_X64_INST_JNO_REL32, // 10
793
+ EVOASM_X64_INST_JNP_REL32, // 11
794
+ EVOASM_X64_INST_JNS_REL32, // 12
795
+ EVOASM_X64_INST_JO_REL32, // 13
796
+ EVOASM_X64_INST_JP_REL32, // 14
797
+ EVOASM_X64_INST_JS_REL32, // 15
798
+ };
799
+
800
+ evoasm_x64_params_t params = {0};
801
+ uint32_t *branch_phi = NULL;
802
+ uint32_t *counter_phi = NULL;
803
+
804
+ if(program->recur_limit == 0) goto next_transition;
805
+
806
+ evoasm_inst_id_t jmp_inst_id = jmp_insts[program->jmp_conds[kernel->idx] % EVOASM_PROGRAM_X64_N_JMP_INSTS];
807
+
808
+ if(kernel->reg_info.x64.regs[EVOASM_X64_REG_RFLAGS].written) {
809
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_OF)) {
810
+ if(jmp_inst_id == EVOASM_X64_INST_JO_REL32 || jmp_inst_id == EVOASM_X64_INST_JNO_REL32) goto branch_transition;
811
+ }
812
+
813
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_SF)) {
814
+ if(jmp_inst_id == EVOASM_X64_INST_JS_REL32 || jmp_inst_id == EVOASM_X64_INST_JNS_REL32) goto branch_transition;
815
+ }
816
+
817
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) {
818
+ if(jmp_inst_id == EVOASM_X64_INST_JE_REL32 ||
819
+ jmp_inst_id == EVOASM_X64_INST_JNE_REL32 ||
820
+ jmp_inst_id == EVOASM_X64_INST_JBE_REL32 ||
821
+ jmp_inst_id == EVOASM_X64_INST_JLE_REL32) {
822
+ goto branch_transition;
823
+ }
824
+ }
825
+
826
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_CF)) {
827
+ if(jmp_inst_id == EVOASM_X64_INST_JB_REL32 ||
828
+ jmp_inst_id == EVOASM_X64_INST_JAE_REL32 ||
829
+ jmp_inst_id == EVOASM_X64_INST_JBE_REL32) {
830
+ goto branch_transition;
831
+ }
832
+ }
833
+
834
+ if((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) &&
835
+ (EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_CF))) {
836
+ if(jmp_inst_id == EVOASM_X64_INST_JA_REL32) goto branch_transition;
837
+ }
838
+
839
+ if((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_SF)) &&
840
+ (EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_OF))) {
841
+
842
+ if(jmp_inst_id == EVOASM_X64_INST_JL_REL32 ||
843
+ jmp_inst_id == EVOASM_X64_INST_JGE_REL32 ||
844
+ jmp_inst_id == EVOASM_X64_INST_JLE_REL32 ||
845
+ ((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) &&
846
+ jmp_inst_id == EVOASM_X64_INST_JG_REL32)) {
847
+ goto branch_transition;
848
+ }
849
+ }
850
+
851
+ if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_PF)) {
852
+ if(jmp_inst_id == EVOASM_X64_INST_JP_REL32 || jmp_inst_id == EVOASM_X64_INST_JNP_REL32) goto branch_transition;
853
+ }
854
+ }
855
+ /* kernel does not write to required jump flag, ignore jmp_off and emit next kernel */
856
+ goto next_transition;
857
+
858
+ #if 0
859
+ /*FIXME: only 8bit possible, check and activate if feasable*/
860
+ if(kernel->reg_info.x64.regs[EVOASM_X64_REG_C].written) {
861
+ jmp_insts[possible_jmp_insts_len++] = EVOASM_X64_INST_JECXZ_JRCXZ_REL8;
862
+ }
863
+ #endif
864
+
865
+ branch_transition:
866
+ {
867
+ evoasm_buf_ref_t buf_ref = {
868
+ .data = buf->data,
869
+ .pos = &buf->pos
870
+ };
871
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
872
+ EVOASM_TRY(error, evoasm_x64_enc_, (evoasm_x64_inst_id_t) jmp_inst_id, &params, &buf_ref);
873
+ branch_phi = EVOASM_BUF_PHI_GET(buf);
874
+ assert(*branch_phi == 0xdeadbeef);
875
+
876
+ if(branch_kernel->idx <= kernel->idx) {
877
+ /* back jump, guard with counter */
878
+
879
+ uint32_t *counter = &program->recur_counters[kernel->idx];
880
+ uintptr_t addr_imm = (uintptr_t) counter;
881
+
882
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
883
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) addr_imm);
884
+ EVOASM_X64_ENC(mov_r64_imm64);
885
+
886
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
887
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, program->recur_limit);
888
+ EVOASM_X64_ENC(cmp_rm32_imm32);
889
+
890
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
891
+ EVOASM_X64_ENC(jge_rel32);
892
+
893
+ counter_phi = EVOASM_BUF_PHI_GET(buf);
894
+ assert(*counter_phi == 0xdeadbeef);
895
+
896
+ EVOASM_X64_ENC(inc_rm32);
897
+ }
898
+
899
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transition, program,
900
+ kernel, branch_kernel, buf, 1, set_io_mapping);
901
+
902
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
903
+ EVOASM_X64_ENC(jmp_rel32);
904
+
905
+ *branch_kernel_phi = EVOASM_BUF_PHI_GET(buf);
906
+ assert(**branch_kernel_phi == 0xdeadbeef);
907
+
908
+ if(branch_phi != NULL) {
909
+ EVOASM_BUF_PHI_SET(branch_phi, EVOASM_BUF_POS_ADDR(buf));
910
+ }
911
+
912
+ if(counter_phi != NULL) {
913
+ EVOASM_BUF_PHI_SET(counter_phi, EVOASM_BUF_POS_ADDR(buf));
914
+ }
915
+ }
916
+
917
+ next_transition:
918
+ if(next_kernel != NULL) {
919
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transition, program,
920
+ kernel, next_kernel, buf, 0, set_io_mapping);
921
+ }
922
+
923
+ evoasm_buf_log(buf, EVOASM_LOG_LEVEL_DEBUG);
924
+
925
+ return true;
926
+
927
+
928
+ error:
929
+ enc_failed:
930
+ return false;
931
+ }
932
+
933
+
934
+ static evoasm_success_t
935
+ evoasm_program_x64_emit_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel, evoasm_buf_t *buf) {
936
+ evoasm_buf_ref_t buf_ref = {
937
+ .data = buf->data,
938
+ .pos = &buf->pos
939
+ };
940
+
941
+ assert(kernel->size > 0);
942
+ for(size_t i = 0; i < kernel->size; i++) {
943
+ evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
944
+ program->exception_mask = program->exception_mask | inst->exceptions;
945
+ EVOASM_TRY(error, evoasm_x64_inst_enc_basic_, inst, &kernel->params.x64[i], &buf_ref);
946
+ }
947
+ return true;
948
+ error:
949
+ return false;
950
+ }
951
+
952
+
953
+ static size_t
954
+ evoasm_program_branch_kernel_idx(evoasm_program_t *program, size_t idx) {
955
+ return (size_t) EVOASM_CLAMP((int) idx + program->jmp_offs[idx], 0, program->size - 1);
956
+ }
957
+
958
+ static evoasm_success_t
959
+ evoasm_program_x64_emit_program_kernels(evoasm_program_t *program, bool set_io_mapping) {
960
+ evoasm_buf_t *buf = program->body_buf;
961
+ evoasm_kernel_t *kernel, *next_kernel, *branch_kernel;
962
+ size_t program_size = program->size;
963
+ uint32_t *branch_phis[EVOASM_PROGRAM_MAX_SIZE] = {0};
964
+ uint8_t *kernel_addrs[EVOASM_PROGRAM_MAX_SIZE];
965
+
966
+ evoasm_buf_reset(buf);
967
+
968
+ assert(program_size > 0);
969
+
970
+ for(size_t i = 0; i < program_size; i++) {
971
+ kernel = &program->kernels[i];
972
+
973
+ kernel_addrs[i] = buf->data + buf->pos;
974
+ kernel->buf_start = (uint16_t) buf->pos;
975
+
976
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel, program, kernel, buf);
977
+
978
+ if(i < program_size - 1) {
979
+ next_kernel = &program->kernels[i + 1];
980
+ } else {
981
+ next_kernel = NULL;
982
+ }
983
+
984
+ size_t branch_kernel_idx = evoasm_program_branch_kernel_idx(program, i);
985
+ assert(branch_kernel_idx < program->size);
986
+ branch_kernel = &program->kernels[branch_kernel_idx];
987
+
988
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transitions, program, kernel,
989
+ next_kernel, branch_kernel, buf, &branch_phis[i], set_io_mapping);
990
+
991
+ kernel->buf_end = (uint16_t) buf->pos;
992
+ }
993
+
994
+ for(size_t i = 0; i < program_size; i++) {
995
+ size_t branch_kernel_idx = evoasm_program_branch_kernel_idx(program, i);
996
+ uint32_t *branch_phi = branch_phis[i];
997
+ if(branch_phi != NULL) {
998
+ uint8_t *branch_kernel_addr = kernel_addrs[branch_kernel_idx];
999
+ assert(*branch_phi == 0xdeadbeef);
1000
+ EVOASM_BUF_PHI_SET(branch_phi, branch_kernel_addr);
1001
+ }
1002
+ }
1003
+
1004
+ return true;
1005
+ error:
1006
+ return false;
1007
+ }
1008
+
1009
+ static evoasm_success_t
1010
+ evoasm_program_x64_emit_io_load_store(evoasm_program_t *program,
1011
+ evoasm_program_input_t *input,
1012
+ bool io_mapping) {
1013
+ size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
1014
+
1015
+ evoasm_buf_reset(program->buf);
1016
+ EVOASM_TRY(error, evoasm_x64_emit_func_prolog, EVOASM_X64_ABI_SYSV, program->buf);
1017
+
1018
+ for(size_t i = 0; i < n_tuples; i++) {
1019
+ evoasm_program_io_val_t *input_vals = input->vals + i * input->arity;
1020
+ EVOASM_TRY(error, evoasm_program_x64_emit_input_load, program, input_vals, input->types, input->arity,
1021
+ io_mapping);
1022
+ size_t r = evoasm_buf_append(program->buf, program->body_buf);
1023
+ assert(r == 0);
1024
+ EVOASM_TRY(error, evoasm_program_x64_emit_output_store, program, i);
1025
+ }
1026
+
1027
+ EVOASM_TRY(error, evoasm_x64_emit_func_epilog, EVOASM_X64_ABI_SYSV, program->buf);
1028
+ return true;
1029
+
1030
+ error:
1031
+ return false;
1032
+ }
1033
+
1034
+ static evoasm_success_t
1035
+ evoasm_program_x64_emit(evoasm_program_t *program,
1036
+ evoasm_program_input_t *input,
1037
+ evoasm_program_emit_flags_t emit_flags) {
1038
+
1039
+ bool set_io_mapping = emit_flags & EVOASM_PROGRAM_EMIT_FLAG_SET_IO_MAPPING;
1040
+
1041
+ if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_PREPARE) {
1042
+ EVOASM_TRY(error, evoasm_program_x64_prepare, program);
1043
+ }
1044
+
1045
+ if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_EMIT_KERNELS) {
1046
+ EVOASM_TRY(error, evoasm_program_x64_emit_program_kernels, program, set_io_mapping);
1047
+ }
1048
+
1049
+ if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_EMIT_IO_LOAD_STORE) {
1050
+ EVOASM_TRY(error, evoasm_program_x64_emit_io_load_store, program, input, set_io_mapping);
1051
+ }
1052
+
1053
+ evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_DEBUG);
1054
+
1055
+ return true;
1056
+
1057
+ error:
1058
+ return false;
1059
+ }
1060
+
1061
+
1062
+ typedef enum {
1063
+ EVOASM_METRIC_ABSDIFF,
1064
+ EVOASM_METRIC_NONE
1065
+ } evoasm_metric;
1066
+
1067
+ static inline void
1068
+ evoasm_program_update_dist_mat(evoasm_program_t *program,
1069
+ evoasm_kernel_t *kernel,
1070
+ evoasm_program_output_t *output,
1071
+ size_t height,
1072
+ size_t tuple_idx,
1073
+ double *dist_mat,
1074
+ evoasm_metric metric) {
1075
+ size_t width = kernel->n_output_regs;
1076
+ evoasm_program_io_val_t *io_vals = output->vals + tuple_idx * output->arity;
1077
+
1078
+ for(size_t i = 0; i < height; i++) {
1079
+ evoasm_program_io_val_t io_val = io_vals[i];
1080
+ evoasm_program_io_val_type_t tuple_type = output->types[i];
1081
+ double io_val_dbl = evoasm_program_io_val_to_dbl(io_val, tuple_type);
1082
+
1083
+ for(size_t j = 0; j < width; j++) {
1084
+ evoasm_program_io_val_t output_val = program->output_vals[tuple_idx * width + j];
1085
+ //uint8_t output_size = program->output_sizes[j];
1086
+ //switch(output_size) {
1087
+ //
1088
+ //}
1089
+ // FIXME: output is essentially just a bitstring and could be anything
1090
+ // an integer (both, signed or unsigned) a float or double.
1091
+ // Moreover, a portion of the output value could
1092
+ // hold the correct answer (e.g. lower 8 or 16 bits etc.).
1093
+ // For now we use the tuple output type and assume signedness.
1094
+ // This needs to be fixed.
1095
+ double output_val_dbl = evoasm_program_io_val_to_dbl(output_val, tuple_type);
1096
+
1097
+ switch(metric) {
1098
+ default:
1099
+ case EVOASM_METRIC_ABSDIFF: {
1100
+ double dist = fabs(output_val_dbl - io_val_dbl);
1101
+ dist_mat[i * width + j] += dist;
1102
+ break;
1103
+ }
1104
+ }
1105
+ }
1106
+ }
1107
+ }
1108
+
1109
+ static void
1110
+ evoasm_program_log_program_output(evoasm_program_t *program,
1111
+ evoasm_kernel_t *kernel,
1112
+ evoasm_program_output_t *output,
1113
+ uint_fast8_t *const matching,
1114
+ evoasm_log_level_t log_level) {
1115
+
1116
+ size_t n_tuples = EVOASM_PROGRAM_OUTPUT_N_TUPLES(output);
1117
+ size_t height = output->arity;
1118
+ size_t width = kernel->n_output_regs;
1119
+
1120
+ evoasm_log(log_level, EVOASM_LOG_TAG, "OUTPUT MATRICES:\n");
1121
+
1122
+ for(size_t i = 0; i < width; i++) {
1123
+ evoasm_log(log_level, EVOASM_LOG_TAG, " %d ", kernel->output_regs.x64[i]);
1124
+ }
1125
+
1126
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
1127
+
1128
+ for(size_t i = 0; i < n_tuples; i++) {
1129
+ for(size_t j = 0; j < height; j++) {
1130
+ for(size_t k = 0; k < width; k++) {
1131
+ bool matched = matching[j] == k;
1132
+ evoasm_program_io_val_t val = program->output_vals[i * width + k];
1133
+
1134
+ if(matched) {
1135
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[1m ");
1136
+ }
1137
+ evoasm_log(log_level, EVOASM_LOG_TAG, " %ld (%f)\t ", val.i64, val.f64);
1138
+ if(matched) {
1139
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[0m ");
1140
+ }
1141
+ }
1142
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
1143
+ }
1144
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
1145
+ }
1146
+ }
1147
+
1148
+ static void
1149
+ evoasm_program_log_dist_dist_mat(evoasm_program_t *program,
1150
+ evoasm_kernel_t *kernel,
1151
+ size_t height,
1152
+ double *dist_mat,
1153
+ uint_fast8_t *matching,
1154
+ evoasm_log_level_t log_level) {
1155
+
1156
+ size_t width = kernel->n_output_regs;
1157
+
1158
+ evoasm_log(log_level, EVOASM_LOG_TAG, "DIST MATRIX: (%zu, %zu)\n", height, width);
1159
+ for(size_t i = 0; i < height; i++) {
1160
+ for(size_t j = 0; j < width; j++) {
1161
+ if(matching[i] == j) {
1162
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[1m ");
1163
+ }
1164
+ evoasm_log(log_level, EVOASM_LOG_TAG, " %.2g\t ", dist_mat[i * width + j]);
1165
+ if(matching[i] == j) {
1166
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[0m ");
1167
+ }
1168
+ }
1169
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
1170
+ }
1171
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
1172
+ }
1173
+
1174
+
1175
+ static inline bool
1176
+ evoasm_program_match(evoasm_program_t *program,
1177
+ size_t width,
1178
+ double *dist_mat,
1179
+ uint_fast8_t *matching) {
1180
+
1181
+ uint_fast8_t best_index = UINT_FAST8_MAX;
1182
+ double best_dist = INFINITY;
1183
+ uint_fast8_t i;
1184
+
1185
+ for(i = 0; i < width; i++) {
1186
+ double v = dist_mat[i];
1187
+ if(v < best_dist) {
1188
+ best_dist = v;
1189
+ best_index = i;
1190
+ }
1191
+ }
1192
+
1193
+ if(evoasm_likely(best_index != UINT_FAST8_MAX)) {
1194
+ *matching = best_index;
1195
+ return true;
1196
+ } else {
1197
+ /*evoasm_program_log_dist_dist_mat(program,
1198
+ 1,
1199
+ dist_mat,
1200
+ matching,
1201
+ EVOASM_LOG_LEVEL_WARN);
1202
+ evoasm_assert_not_reached();*/
1203
+ /*
1204
+ * Might happen if all elements are inf or nan
1205
+ */
1206
+ return false;
1207
+ }
1208
+ }
1209
+
1210
+ static inline void
1211
+ evoasm_program_calc_stable_matching(evoasm_program_t *program,
1212
+ evoasm_kernel_t *kernel,
1213
+ size_t height,
1214
+ double *dist_mat,
1215
+ uint_fast8_t *matching) {
1216
+
1217
+ uint_fast8_t width = (uint_fast8_t) kernel->n_output_regs;
1218
+ uint_fast8_t *inv_matching = evoasm_alloca(width * sizeof(uint_fast8_t));
1219
+ uint_fast8_t i;
1220
+
1221
+ // calculates a stable matching
1222
+ for(i = 0; i < height; i++) {
1223
+ matching[i] = UINT_FAST8_MAX;
1224
+ }
1225
+
1226
+ for(i = 0; i < width; i++) {
1227
+ inv_matching[i] = UINT_FAST8_MAX;
1228
+ }
1229
+
1230
+ while(true) {
1231
+ uint_fast8_t unmatched_index = UINT_FAST8_MAX;
1232
+ uint_fast8_t best_index = UINT_FAST8_MAX;
1233
+ double best_dist = INFINITY;
1234
+
1235
+ for(i = 0; i < height; i++) {
1236
+ if(matching[i] == UINT_FAST8_MAX) {
1237
+ unmatched_index = i;
1238
+ break;
1239
+ }
1240
+ }
1241
+
1242
+ if(unmatched_index == UINT_FAST8_MAX) {
1243
+ break;
1244
+ }
1245
+
1246
+ for(i = 0; i < width; i++) {
1247
+ double v = dist_mat[unmatched_index * width + i];
1248
+ if(v < best_dist) {
1249
+ best_dist = v;
1250
+ best_index = i;
1251
+ }
1252
+ }
1253
+
1254
+ if(evoasm_likely(best_index != UINT_FAST8_MAX)) {
1255
+ if(inv_matching[best_index] == UINT_FAST8_MAX) {
1256
+ inv_matching[best_index] = unmatched_index;
1257
+ matching[unmatched_index] = best_index;
1258
+ } else {
1259
+ if(dist_mat[inv_matching[best_index] * width + best_index] > best_dist) {
1260
+ matching[inv_matching[best_index]] = UINT_FAST8_MAX;
1261
+ inv_matching[best_index] = unmatched_index;
1262
+ matching[unmatched_index] = best_index;
1263
+ } else {
1264
+ //dist_mat[unmatched_index * width + i] = copysign(best_dist, -1.0);
1265
+ dist_mat[unmatched_index * width + i] = INFINITY;
1266
+ }
1267
+ }
1268
+ } else {
1269
+ evoasm_program_log_dist_dist_mat(program,
1270
+ kernel,
1271
+ height,
1272
+ dist_mat,
1273
+ matching,
1274
+ EVOASM_LOG_LEVEL_DEBUG);
1275
+ evoasm_assert_not_reached();
1276
+ }
1277
+ }
1278
+ }
1279
+
1280
+
1281
+ static inline evoasm_loss_t
1282
+ evoasm_program_calc_loss(evoasm_program_t *program,
1283
+ evoasm_kernel_t *kernel,
1284
+ size_t height,
1285
+ double *dist_mat,
1286
+ uint_fast8_t *matching) {
1287
+ size_t width = kernel->n_output_regs;
1288
+ double scale = 1.0 / (double) width;
1289
+ double loss = 0.0;
1290
+
1291
+ for(size_t i = 0; i < height; i++) {
1292
+ loss += (scale * dist_mat[i * width + matching[i]]);
1293
+ }
1294
+
1295
+ return (evoasm_loss_t) loss;
1296
+ }
1297
+
1298
+
1299
+ static evoasm_loss_t
1300
+ evoasm_program_assess(evoasm_program_t *program,
1301
+ evoasm_program_output_t *output) {
1302
+
1303
+ size_t n_tuples = EVOASM_PROGRAM_OUTPUT_N_TUPLES(output);
1304
+ size_t height = output->arity;
1305
+ evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
1306
+ size_t width = kernel->n_output_regs;
1307
+ size_t dist_mat_len = (size_t) (width * height);
1308
+ double *dist_mat = evoasm_alloca(dist_mat_len * sizeof(double));
1309
+ uint_fast8_t *matching = evoasm_alloca(height * sizeof(uint_fast8_t));
1310
+ evoasm_loss_t loss;
1311
+
1312
+ for(size_t i = 0; i < dist_mat_len; i++) {
1313
+ dist_mat[i] = 0.0;
1314
+ }
1315
+
1316
+ if(height == 1) {
1317
+ /* COMMON FAST-PATH */
1318
+ for(size_t i = 0; i < n_tuples; i++) {
1319
+ evoasm_program_update_dist_mat(program, kernel, output, 1, i, dist_mat, EVOASM_METRIC_ABSDIFF);
1320
+ }
1321
+
1322
+ if(evoasm_program_match(program, width, dist_mat, matching)) {
1323
+ loss = evoasm_program_calc_loss(program, kernel, 1, dist_mat, matching);
1324
+ } else {
1325
+ loss = INFINITY;
1326
+ }
1327
+ } else {
1328
+ for(size_t i = 0; i < n_tuples; i++) {
1329
+ evoasm_program_update_dist_mat(program, kernel, output, height, i, dist_mat, EVOASM_METRIC_ABSDIFF);
1330
+ }
1331
+
1332
+ evoasm_program_calc_stable_matching(program, kernel, height, dist_mat, matching);
1333
+ loss = evoasm_program_calc_loss(program, kernel, height, dist_mat, matching);
1334
+ }
1335
+
1336
+
1337
+ for(size_t i = 0; i < height; i++) {
1338
+ switch(program->arch_info->id) {
1339
+ case EVOASM_ARCH_X64: {
1340
+ program->output_regs[i] = kernel->output_regs.x64[matching[i]];
1341
+ break;
1342
+ }
1343
+ default:
1344
+ evoasm_assert_not_reached();
1345
+ }
1346
+ }
1347
+
1348
+ #if EVOASM_LOG_LEVEL <= EVOASM_LOG_LEVEL_DEBUG
1349
+ if(loss == 0.0) {
1350
+ evoasm_program_log_program_output(program,
1351
+ kernel,
1352
+ output,
1353
+ matching,
1354
+ EVOASM_LOG_LEVEL_DEBUG);
1355
+ }
1356
+ #endif
1357
+
1358
+ return loss;
1359
+ }
1360
+
1361
+ static void
1362
+ evoasm_program_reset_recur_counters(evoasm_program_t *program) {
1363
+ memset(program->recur_counters, 0, sizeof(program->recur_counters[0]) * program->size);
1364
+ }
1365
+
1366
+ static inline evoasm_loss_t
1367
+ evoasm_program_eval_(evoasm_program_t *program,
1368
+ evoasm_program_output_t *output) {
1369
+
1370
+ evoasm_kernel_t *last_kernel = &program->kernels[program->size - 1];
1371
+ evoasm_loss_t loss;
1372
+
1373
+ if(evoasm_unlikely(last_kernel->n_output_regs == 0)) {
1374
+ evoasm_log_info("program %p has no output", (void *) program);
1375
+ return INFINITY;
1376
+ }
1377
+
1378
+ evoasm_program_reset_recur_counters(program);
1379
+
1380
+ evoasm_signal_set_exception_mask(program->exception_mask);
1381
+
1382
+ #ifdef EVOASM_ENABLE_PARANOID_MODE
1383
+ for(size_t i = 0; i < program->size; i++) {
1384
+ evoasm_kernel_t *kernel = &program->kernels[i];
1385
+ for(size_t j = 0; j < EVOASM_X64_REG_NONE; j++) {
1386
+ kernel->rand_vals[j].i64 = rand() | (rand() << (rand() % 24));
1387
+ }
1388
+ }
1389
+ #endif
1390
+
1391
+ if(EVOASM_SIGNAL_TRY()) {
1392
+ evoasm_buf_exec(program->buf);
1393
+ loss = evoasm_program_assess(program, output);
1394
+ } else {
1395
+ evoasm_log_debug("program %p signaled", (void *) program);
1396
+ loss = INFINITY;
1397
+ }
1398
+
1399
+ evoasm_signal_clear_exception_mask();
1400
+
1401
+ return loss;
1402
+ }
1403
+
1404
+ evoasm_loss_t
1405
+ evoasm_program_eval(evoasm_program_t *program,
1406
+ evoasm_program_output_t *output) {
1407
+
1408
+ evoasm_loss_t loss = evoasm_program_eval_(program, output);
1409
+
1410
+ #ifdef EVOASM_ENABLE_PARANOID_MODE
1411
+ for(size_t i = 0; i < 10; i++) {
1412
+ evoasm_loss_t loss_ = evoasm_program_eval_(program, output);
1413
+
1414
+ if(loss_ != loss) {
1415
+ evoasm_program_log(program, EVOASM_LOG_LEVEL_WARN);
1416
+ evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_WARN);
1417
+ }
1418
+ assert(loss_ == loss);
1419
+ }
1420
+ #endif
1421
+
1422
+ return loss;
1423
+ }
1424
+
1425
+ static evoasm_program_output_t *
1426
+ evoasm_program_load_output(evoasm_program_t *program,
1427
+ evoasm_kernel_t *kernel,
1428
+ evoasm_program_input_t *input) {
1429
+
1430
+ size_t width = kernel->n_output_regs;
1431
+ evoasm_program_output_t *output = &program->_output;
1432
+ size_t height = output->arity;
1433
+ size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
1434
+ uint_fast8_t *matching = evoasm_alloca(height * sizeof(uint_fast8_t));
1435
+
1436
+ evoasm_program_output_t *load_output = evoasm_program_io_alloc(
1437
+ (uint16_t) (EVOASM_PROGRAM_INPUT_N_TUPLES(input) * height));
1438
+
1439
+ for(size_t i = 0; i < height; i++) {
1440
+ for(size_t j = 0; j < kernel->n_output_regs; j++) {
1441
+ if(program->output_regs[i] == kernel->output_regs.x64[j]) {
1442
+ matching[i] = (uint_fast8_t) j;
1443
+ goto next;
1444
+ }
1445
+ }
1446
+ evoasm_log_fatal("program output reg %d not found in kernel output regs", program->output_regs[i]);
1447
+ evoasm_assert_not_reached();
1448
+ next:;
1449
+ }
1450
+
1451
+ for(size_t i = 0; i < n_tuples; i++) {
1452
+ for(size_t j = 0; j < height; j++) {
1453
+ load_output->vals[i * height + j] = program->output_vals[i * width + matching[j]];
1454
+ }
1455
+ }
1456
+
1457
+ load_output->arity = output->arity;
1458
+ memcpy(load_output->types, output->types, EVOASM_ARY_LEN(output->types));
1459
+
1460
+ #if EVOASM_LOG_LEVEL <= EVOASM_LOG_LEVEL_DEBUG
1461
+ evoasm_program_log_program_output(program,
1462
+ kernel,
1463
+ load_output,
1464
+ matching,
1465
+ EVOASM_LOG_LEVEL_DEBUG);
1466
+ #endif
1467
+ return load_output;
1468
+ }
1469
+
1470
+ evoasm_program_output_t *
1471
+ evoasm_program_run(evoasm_program_t *program,
1472
+ evoasm_program_input_t *input) {
1473
+ evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
1474
+ evoasm_program_output_t *output;
1475
+
1476
+ if(input->arity != program->_input.arity) {
1477
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
1478
+ "arity mismatch (%d for %d)", input->arity, program->_input.arity);
1479
+ return NULL;
1480
+ }
1481
+
1482
+ size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
1483
+ if(n_tuples > program->max_tuples) {
1484
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
1485
+ "Maximum number of input/output tuples exceeded (%zu > %d)", n_tuples, program->max_tuples);
1486
+ return NULL;
1487
+ }
1488
+
1489
+ for(size_t i = 0; i < input->arity; i++) {
1490
+ if(input->types[i] != program->_input.types[i]) {
1491
+ evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
1492
+ "type mismatch (%d != %d)", input->types[i], program->_input.types[i]);
1493
+ return NULL;
1494
+ }
1495
+ }
1496
+
1497
+ evoasm_program_emit_flags_t emit_flags = EVOASM_PROGRAM_EMIT_FLAG_EMIT_IO_LOAD_STORE;
1498
+ if(!evoasm_program_emit(program, input, emit_flags)) {
1499
+ return NULL;
1500
+ }
1501
+
1502
+ evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_DEBUG);
1503
+ evoasm_signal_set_exception_mask(program->exception_mask);
1504
+
1505
+ if(!evoasm_buf_protect(program->buf, EVOASM_MPROT_MODE_RX)) {
1506
+ evoasm_assert_not_reached();
1507
+ }
1508
+
1509
+ evoasm_program_reset_recur_counters(program);
1510
+
1511
+ if(EVOASM_SIGNAL_TRY()) {
1512
+ evoasm_buf_exec(program->buf);
1513
+ output = evoasm_program_load_output(program,
1514
+ kernel,
1515
+ input);
1516
+ } else {
1517
+ evoasm_log_debug("signaled\n");
1518
+ output = NULL;
1519
+ }
1520
+
1521
+ if(!evoasm_buf_protect(program->buf, EVOASM_MPROT_MODE_RW)) {
1522
+ evoasm_assert_not_reached();
1523
+ }
1524
+
1525
+ evoasm_signal_clear_exception_mask();
1526
+
1527
+ return output;
1528
+ }
1529
+
1530
+ evoasm_success_t
1531
+ evoasm_program_emit(evoasm_program_t *program,
1532
+ evoasm_program_input_t *input,
1533
+ evoasm_program_emit_flags_t emit_flags) {
1534
+ switch(program->arch_info->id) {
1535
+ case EVOASM_ARCH_X64: {
1536
+ return evoasm_program_x64_emit(program, input,
1537
+ emit_flags);
1538
+ break;
1539
+ }
1540
+ default:
1541
+ evoasm_assert_not_reached();
1542
+ }
1543
+ }
1544
+
1545
+ static size_t
1546
+ evoasm_program_x64_find_writers_(evoasm_program_t *program, evoasm_kernel_t *kernel, evoasm_reg_id_t reg_id,
1547
+ size_t idx, size_t *writers) {
1548
+ size_t len = 0;
1549
+ for(int i = (int) idx; i >= 0; i--) {
1550
+ evoasm_x64_reg_liveness_t reg_liveness;
1551
+ evoasm_x64_reg_liveness_init(&reg_liveness);
1552
+
1553
+ if(evoasm_kernel_is_writing_inst_x64(kernel, (size_t) i, reg_id, &reg_liveness)) {
1554
+ writers[len++] = (size_t) i;
1555
+ }
1556
+ }
1557
+ return len;
1558
+ }
1559
+
1560
+ static size_t
1561
+ evoasm_program_x64_find_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
1562
+ evoasm_reg_id_t reg_id, size_t idx, size_t *writers) {
1563
+
1564
+ return evoasm_program_x64_find_writers_(program, kernel, reg_id, idx, writers);
1565
+ }
1566
+
1567
+ typedef struct {
1568
+ bool change;
1569
+ evoasm_bitmap1024_t inst_bitmaps[EVOASM_PROGRAM_MAX_SIZE];
1570
+ evoasm_bitmap256_t output_reg_bitmaps[EVOASM_PROGRAM_MAX_SIZE];
1571
+ } evoasm_program_intron_elimination_ctx;
1572
+
1573
+ static void
1574
+ evoasm_program_x64_mark_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
1575
+ evoasm_reg_id_t reg_id, size_t idx, evoasm_program_intron_elimination_ctx *ctx) {
1576
+ size_t writers[16];
1577
+
1578
+ size_t writers_len = evoasm_program_x64_find_writers(program, kernel, reg_id, idx, writers);
1579
+
1580
+ if(writers_len > 0) {
1581
+ for(size_t i = 0; i < writers_len; i++) {
1582
+ size_t writer_idx = writers[i];
1583
+ evoasm_bitmap_t *inst_bitmap = (evoasm_bitmap_t *) &ctx->inst_bitmaps[kernel->idx];
1584
+ if(evoasm_bitmap_get(inst_bitmap, writer_idx)) continue;
1585
+
1586
+ evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[writer_idx]);
1587
+ evoasm_bitmap_set(inst_bitmap, writer_idx);
1588
+ ctx->change = true;
1589
+
1590
+ for(size_t j = 0; j < x64_inst->n_operands; j++) {
1591
+ evoasm_x64_operand_t *op = &x64_inst->operands[j];
1592
+ evoasm_x64_reg_id_t op_reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) writer_idx);
1593
+
1594
+ if(op->read) {
1595
+ if(writer_idx > 0) {
1596
+ evoasm_program_x64_mark_writers(program, kernel, op_reg_id, writer_idx - 1u, ctx);
1597
+ }
1598
+
1599
+ if(kernel->reg_info.x64.regs[op_reg_id].input) {
1600
+ for(int k = kernel->idx - 1; k >= 0; k--) {
1601
+ size_t trans_idx = SIZE_MAX;
1602
+ if(k + 1 == kernel->idx) {
1603
+ trans_idx = 0;
1604
+ } else if(evoasm_program_branch_kernel_idx(program, (size_t) k) == kernel->idx) {
1605
+ trans_idx = 1;
1606
+ };
1607
+
1608
+ if(trans_idx != SIZE_MAX) {
1609
+ evoasm_kernel_t *trans_kernel = &program->kernels[k];
1610
+
1611
+ for(size_t l = 0; l < EVOASM_X64_REG_NONE; l++) {
1612
+ if(trans_kernel->reg_info.x64.trans_regs[trans_idx][op_reg_id] == l) {
1613
+ evoasm_bitmap_set((evoasm_bitmap_t *) &ctx->output_reg_bitmaps[k], l);
1614
+ }
1615
+ }
1616
+ }
1617
+ }
1618
+ }
1619
+ }
1620
+ }
1621
+ }
1622
+ }
1623
+ }
1624
+
1625
+ static void
1626
+ evoasm_program_mark_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
1627
+ evoasm_reg_id_t reg_id, size_t index, evoasm_program_intron_elimination_ctx *ctx) {
1628
+ switch(program->arch_info->id) {
1629
+ case EVOASM_ARCH_X64: {
1630
+ evoasm_program_x64_mark_writers(program, kernel, reg_id, index, ctx);
1631
+ break;
1632
+ }
1633
+ default:
1634
+ evoasm_assert_not_reached();
1635
+ }
1636
+ }
1637
+
1638
+ static evoasm_success_t
1639
+ evoasm_program_mark_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel,
1640
+ evoasm_program_intron_elimination_ctx *ctx) {
1641
+ for(size_t i = 0; i < EVOASM_X64_REG_NONE; i++) {
1642
+ evoasm_bitmap_t *bitmap = (evoasm_bitmap_t *) &ctx->output_reg_bitmaps[kernel->idx];
1643
+ if(evoasm_bitmap_get(bitmap, i)) {
1644
+ evoasm_program_mark_writers(program, kernel, (evoasm_reg_id_t) i, (size_t) (kernel->size - 1),
1645
+ ctx);
1646
+ }
1647
+ }
1648
+
1649
+ return true;
1650
+ }
1651
+
1652
+ evoasm_success_t
1653
+ evoasm_program_eliminate_introns(evoasm_program_t *program, evoasm_program_t *dst_program) {
1654
+ size_t last_kernel_idx = (size_t) (program->size - 1);
1655
+ evoasm_program_intron_elimination_ctx ctx = {0};
1656
+
1657
+ //evoasm_kernel_t *last_kernel = &program->kernels[last_kernel_idx];
1658
+
1659
+ EVOASM_TRY(error, evoasm_program_init,
1660
+ dst_program,
1661
+ program->arch_info,
1662
+ program->size,
1663
+ program->kernels[0].size,
1664
+ program->max_tuples,
1665
+ program->recur_limit,
1666
+ false);
1667
+
1668
+ evoasm_bitmap_t *output_bitmap = (evoasm_bitmap_t *) &ctx.output_reg_bitmaps[last_kernel_idx];
1669
+ for(size_t i = 0; i < program->_output.arity; i++) {
1670
+ evoasm_bitmap_set(output_bitmap, program->output_regs[i]);
1671
+ }
1672
+
1673
+ do {
1674
+ ctx.change = false;
1675
+ for(int i = (int) last_kernel_idx; i >= 0; i--) {
1676
+ EVOASM_TRY(error, evoasm_program_mark_kernel, program,
1677
+ &program->kernels[i], &ctx);
1678
+ }
1679
+ } while(ctx.change);
1680
+
1681
+ /* sweep */
1682
+ for(size_t i = 0; i <= last_kernel_idx; i++) {
1683
+ evoasm_kernel_t *kernel = &program->kernels[i];
1684
+ evoasm_kernel_t *dst_kernel = &dst_program->kernels[i];
1685
+ evoasm_bitmap_t *inst_bitmap = (evoasm_bitmap_t *) &ctx.inst_bitmaps[i];
1686
+
1687
+ size_t k = 0;
1688
+ for(size_t j = 0; j < kernel->size; j++) {
1689
+ if(evoasm_bitmap_get(inst_bitmap, j)) {
1690
+ dst_kernel->insts[k] = kernel->insts[j];
1691
+ dst_kernel->params.x64[k] = kernel->params.x64[j];
1692
+ k++;
1693
+ }
1694
+ }
1695
+
1696
+ if(dst_kernel != kernel) {
1697
+ dst_kernel->size = (uint16_t) k;
1698
+ dst_kernel->reg_info = kernel->reg_info;
1699
+ dst_kernel->output_regs = kernel->output_regs;
1700
+ dst_kernel->n_input_regs = kernel->n_input_regs;
1701
+ dst_kernel->n_output_regs = kernel->n_output_regs;
1702
+ }
1703
+ }
1704
+
1705
+ if(dst_program != program) {
1706
+ dst_program->_input = program->_input;
1707
+ dst_program->_output = program->_output;
1708
+ memcpy(dst_program->output_regs, program->output_regs, sizeof(program->output_regs));
1709
+ EVOASM_MEMCPY_N(dst_program->jmp_offs, program->jmp_offs, program->size);
1710
+ EVOASM_MEMCPY_N(dst_program->jmp_conds, program->jmp_conds, program->size);
1711
+ }
1712
+
1713
+ evoasm_program_emit_flags_t emit_flags =
1714
+ EVOASM_PROGRAM_EMIT_FLAG_PREPARE |
1715
+ EVOASM_PROGRAM_EMIT_FLAG_EMIT_KERNELS;
1716
+
1717
+ EVOASM_TRY(error, evoasm_program_emit, dst_program, NULL, emit_flags);
1718
+
1719
+ return true;
1720
+ error:
1721
+ return false;
1722
+ }
1723
+
1724
+
1725
+ #define EVOASM_PROGRAM_PROLOG_EPILOG_SIZE UINT32_C(1024)
1726
+ #define EVOASM_PROGRAM_TRANSITION_SIZE UINT32_C(512)
1727
+
1728
+
1729
+ evoasm_success_t
1730
+ evoasm_program_init(evoasm_program_t *program,
1731
+ evoasm_arch_info_t *arch_info,
1732
+ size_t program_size,
1733
+ size_t kernel_size,
1734
+ size_t max_tuples,
1735
+ size_t recur_limit,
1736
+ bool shallow) {
1737
+
1738
+ static evoasm_program_t zero_program = {0};
1739
+ size_t n_transitions = program_size - 1u;
1740
+
1741
+ *program = zero_program;
1742
+ program->arch_info = arch_info;
1743
+ program->recur_limit = (uint32_t) recur_limit;
1744
+ program->shallow = shallow;
1745
+ program->size = (uint16_t) program_size;
1746
+ program->max_tuples = (uint16_t) max_tuples;
1747
+
1748
+ size_t body_buf_size =
1749
+ (size_t) (n_transitions * EVOASM_PROGRAM_TRANSITION_SIZE
1750
+ + program_size * kernel_size * program->arch_info->max_inst_len);
1751
+
1752
+ size_t buf_size = max_tuples * (body_buf_size + EVOASM_PROGRAM_PROLOG_EPILOG_SIZE);
1753
+
1754
+ EVOASM_TRY(error, evoasm_buf_init, &program->_buf, EVOASM_BUF_TYPE_MMAP, buf_size);
1755
+ program->buf = &program->_buf;
1756
+
1757
+ EVOASM_TRY(error, evoasm_buf_init, &program->_body_buf, EVOASM_BUF_TYPE_MALLOC, body_buf_size);
1758
+ program->body_buf = &program->_body_buf;
1759
+
1760
+ EVOASM_TRY(error, evoasm_buf_protect, &program->_buf,
1761
+ EVOASM_MPROT_MODE_RWX);
1762
+
1763
+ size_t output_vals_len = max_tuples * EVOASM_KERNEL_MAX_OUTPUT_REGS;
1764
+
1765
+ EVOASM_TRY_ALLOC(error, calloc, program->output_vals, output_vals_len, sizeof(evoasm_program_io_val_t));
1766
+ EVOASM_TRY_ALLOC(error, calloc, program->kernels, program_size, sizeof(evoasm_kernel_t));
1767
+ EVOASM_TRY_ALLOC(error, calloc, program->recur_counters, program_size, sizeof(uint32_t));
1768
+ EVOASM_TRY_ALLOC(error, calloc, program->jmp_conds, program_size, sizeof(uint8_t));
1769
+ EVOASM_TRY_ALLOC(error, calloc, program->jmp_offs, program_size, sizeof(int16_t));
1770
+
1771
+ for(uint16_t i = 0; i < program_size; i++) {
1772
+ evoasm_kernel_t *kernel = &program->kernels[i];
1773
+
1774
+ kernel->idx = i;
1775
+ kernel->size = (uint16_t) kernel_size;
1776
+
1777
+ if(!shallow) {
1778
+ EVOASM_TRY_ALLOC(error, calloc, kernel->insts, kernel_size, sizeof(kernel->insts[0]));
1779
+ switch(program->arch_info->id) {
1780
+ case EVOASM_ARCH_X64: {
1781
+ EVOASM_TRY_ALLOC(error, calloc, kernel->params.x64, kernel_size, sizeof(kernel->params.x64[0]));
1782
+ break;
1783
+ }
1784
+ default:
1785
+ evoasm_assert_not_reached();
1786
+ }
1787
+ }
1788
+ }
1789
+
1790
+
1791
+ return true;
1792
+
1793
+ error:
1794
+ EVOASM_TRY_WARN(evoasm_program_destroy, program);
1795
+ return false;
1796
+ }
1797
+
1798
+ void
1799
+ evoasm_kernel_log(evoasm_kernel_t *kernel, evoasm_arch_id_t arch_id, evoasm_log_level_t log_level) {
1800
+ if(_evoasm_log_level > log_level) return;
1801
+
1802
+ switch(arch_id) {
1803
+ case EVOASM_ARCH_X64:
1804
+ for(size_t i = 0; i < kernel->size; i++) {
1805
+ evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
1806
+ const char *mnem = evoasm_x64_inst_get_mnem(inst);
1807
+ evoasm_log(log_level, EVOASM_LOG_TAG, "%s", mnem);
1808
+ }
1809
+ break;
1810
+ default:
1811
+ evoasm_assert_not_reached();
1812
+ }
1813
+ }
1814
+
1815
+ void
1816
+ evoasm_program_log(evoasm_program_t *program, evoasm_log_level_t log_level) {
1817
+ if(_evoasm_log_level > log_level) return;
1818
+
1819
+ evoasm_log(log_level, EVOASM_LOG_TAG, "Evoasm::Program: size: %d", program->size);
1820
+
1821
+ for(size_t i = 0; i < program->size; i++) {
1822
+ evoasm_kernel_log(&program->kernels[i], (evoasm_arch_id_t) program->arch_info->id, log_level);
1823
+ }
1824
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
1825
+ }
1826
+
1827
+ EVOASM_DEF_ALLOC_FREE_FUNCS(program)