evoasm 0.0.2.pre7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/.gemrelease +2 -0
  3. data/.gitignore +16 -0
  4. data/Gemfile +4 -0
  5. data/Gemfile.rake +8 -0
  6. data/Gemfile.rake.lock +51 -0
  7. data/LICENSE.txt +373 -0
  8. data/Makefile +6 -0
  9. data/README.md +43 -0
  10. data/Rakefile +128 -0
  11. data/bin/gdb +2 -0
  12. data/data/tables/README.md +19 -0
  13. data/data/tables/x64.csv +1684 -0
  14. data/data/templates/evoasm-x64.c.erb +319 -0
  15. data/data/templates/evoasm-x64.h.erb +126 -0
  16. data/evoasm.gemspec +30 -0
  17. data/examples/abs.yml +20 -0
  18. data/examples/popcnt.yml +17 -0
  19. data/examples/sym_reg.yml +26 -0
  20. data/exe/evoasm-search +13 -0
  21. data/ext/evoasm_ext/evoasm-alloc.c +145 -0
  22. data/ext/evoasm_ext/evoasm-alloc.h +59 -0
  23. data/ext/evoasm_ext/evoasm-arch.c +44 -0
  24. data/ext/evoasm_ext/evoasm-arch.h +161 -0
  25. data/ext/evoasm_ext/evoasm-bitmap.h +114 -0
  26. data/ext/evoasm_ext/evoasm-buf.c +130 -0
  27. data/ext/evoasm_ext/evoasm-buf.h +47 -0
  28. data/ext/evoasm_ext/evoasm-error.c +31 -0
  29. data/ext/evoasm_ext/evoasm-error.h +75 -0
  30. data/ext/evoasm_ext/evoasm-free-list.c.tmpl +121 -0
  31. data/ext/evoasm_ext/evoasm-free-list.h.tmpl +86 -0
  32. data/ext/evoasm_ext/evoasm-log.c +108 -0
  33. data/ext/evoasm_ext/evoasm-log.h +69 -0
  34. data/ext/evoasm_ext/evoasm-misc.c +23 -0
  35. data/ext/evoasm_ext/evoasm-misc.h +282 -0
  36. data/ext/evoasm_ext/evoasm-param.h +37 -0
  37. data/ext/evoasm_ext/evoasm-search.c +2145 -0
  38. data/ext/evoasm_ext/evoasm-search.h +214 -0
  39. data/ext/evoasm_ext/evoasm-util.h +40 -0
  40. data/ext/evoasm_ext/evoasm-x64.c +275624 -0
  41. data/ext/evoasm_ext/evoasm-x64.h +5436 -0
  42. data/ext/evoasm_ext/evoasm.c +7 -0
  43. data/ext/evoasm_ext/evoasm.h +23 -0
  44. data/ext/evoasm_ext/evoasm_ext.c +1757 -0
  45. data/ext/evoasm_ext/extconf.rb +31 -0
  46. data/lib/evoasm/cli/search.rb +127 -0
  47. data/lib/evoasm/cli.rb +6 -0
  48. data/lib/evoasm/core_ext/array.rb +9 -0
  49. data/lib/evoasm/core_ext/integer.rb +10 -0
  50. data/lib/evoasm/core_ext/kwstruct.rb +13 -0
  51. data/lib/evoasm/core_ext/range.rb +5 -0
  52. data/lib/evoasm/core_ext.rb +1 -0
  53. data/lib/evoasm/error.rb +20 -0
  54. data/lib/evoasm/examples.rb +27 -0
  55. data/lib/evoasm/gen/enum.rb +169 -0
  56. data/lib/evoasm/gen/name_util.rb +80 -0
  57. data/lib/evoasm/gen/state.rb +176 -0
  58. data/lib/evoasm/gen/state_dsl.rb +152 -0
  59. data/lib/evoasm/gen/strio.rb +27 -0
  60. data/lib/evoasm/gen/translator.rb +1102 -0
  61. data/lib/evoasm/gen/version.rb +5 -0
  62. data/lib/evoasm/gen/x64/funcs.rb +495 -0
  63. data/lib/evoasm/gen/x64/inst.rb +781 -0
  64. data/lib/evoasm/gen/x64.rb +237 -0
  65. data/lib/evoasm/gen.rb +8 -0
  66. data/lib/evoasm/program.rb +23 -0
  67. data/lib/evoasm/search.rb +40 -0
  68. data/lib/evoasm/tasks/gen_task.rb +86 -0
  69. data/lib/evoasm/tasks/template_task.rb +52 -0
  70. data/lib/evoasm/version.rb +3 -0
  71. data/lib/evoasm.rb +22 -0
  72. data/test/test_helper.rb +1 -0
  73. data/test/x64/test_helper.rb +19 -0
  74. data/test/x64/x64_test.rb +87 -0
  75. metadata +221 -0
@@ -0,0 +1,2145 @@
1
+ #define _DEFAULT_SOURCE
2
+
3
+ #include "evoasm-search.h"
4
+ #include "evoasm-error.h"
5
+ #include <stdalign.h>
6
+
7
+ #if 0
8
+ #ifdef __STDC_NO_THREADS__
9
+ #include "tinycthread.h"
10
+ #else
11
+ #include <threads.h>
12
+ #endif
13
+ #endif
14
+
15
+ EVOASM_DECL_LOG_TAG("search")
16
+
17
+ #define _EVOASM_KERNEL_SIZE(max_kernel_size) \
18
+ (sizeof(evoasm_kernel_params) + \
19
+ (max_kernel_size) * sizeof(evoasm_kernel_param))
20
+
21
+ #define _EVOASM_PROGRAM_SIZE(max_program_size, max_kernel_size) \
22
+ (sizeof(evoasm_program_params) + \
23
+ (max_program_size) * _EVOASM_KERNEL_SIZE(max_kernel_size))
24
+
25
+ #define _EVOASM_SEARCH_PROGRAM_PARAMS(search, programs, program_index) \
26
+ ((evoasm_program_params *)((unsigned char *)(programs) + (program_index) * _EVOASM_PROGRAM_SIZE(search->params.max_program_size, search->params.max_kernel_size)))
27
+
28
+ #define _EVOASM_PROGRAM_PARAMS_KERNEL_PARAMS(program_params, max_kernel_size, kernel_index) \
29
+ ((evoasm_kernel_params *)((unsigned char *)(program_params) + sizeof(evoasm_program_params) + (kernel_index) * _EVOASM_KERNEL_SIZE(max_kernel_size)))
30
+
31
+ #define EVOASM_PROGRAM_OUTPUT_VALS_SIZE(io) \
32
+ ((size_t)EVOASM_PROGRAM_IO_N(io) * \
33
+ (size_t)EVOASM_KERNEL_MAX_OUTPUT_REGS * \
34
+ sizeof(evoasm_example_val))
35
+
36
+ #if (defined(__unix__) || defined(__unix) ||\
37
+ (defined(__APPLE__) && defined(__MACH__)))
38
+
39
+ #define EVOASM_SEARCH_PROLOG_EPILOG_SIZE UINT32_C(1024)
40
+
41
+ #include <setjmp.h>
42
+ #include <stdio.h>
43
+ #include <signal.h>
44
+ #include <stdatomic.h>
45
+
46
+ #define _EVOASM_SIGNAL_CONTEXT_TRY(signal_ctx) (sigsetjmp((signal_ctx)->env, 1) == 0)
47
+ #define _EVOASM_SEARCH_EXCEPTION_SET_P(exc) (_evoasm_signal_ctx->exception_mask & (1 << exc))
48
+
49
+ struct evoasm_signal_context {
50
+ uint32_t exception_mask;
51
+ sigjmp_buf env;
52
+ struct sigaction prev_action;
53
+ evoasm_arch_id arch_id;
54
+ };
55
+
56
+
57
+ _Thread_local volatile struct evoasm_signal_context *_evoasm_signal_ctx;
58
+
59
+ static void
60
+ _evoasm_signal_handler(int sig, siginfo_t *siginfo, void *ctx) {
61
+ bool handle = false;
62
+
63
+ atomic_signal_fence(memory_order_acquire);
64
+
65
+ switch(_evoasm_signal_ctx->arch_id) {
66
+ case EVOASM_ARCH_X64: {
67
+ switch(sig) {
68
+ case SIGFPE: {
69
+ bool catch_div_by_zero = siginfo->si_code == FPE_INTDIV &&
70
+ _EVOASM_SEARCH_EXCEPTION_SET_P(EVOASM_X64_EXCEPTION_DE);
71
+ handle = catch_div_by_zero;
72
+ break;
73
+ }
74
+ default:
75
+ break;
76
+ }
77
+ break;
78
+ }
79
+ default: evoasm_assert_not_reached();
80
+ }
81
+
82
+ if(handle) {
83
+ siglongjmp(*((jmp_buf *)&_evoasm_signal_ctx->env), 1);
84
+ } else {
85
+ raise(sig);
86
+ }
87
+ }
88
+
89
+ static void
90
+ evoasm_signal_context_install(struct evoasm_signal_context *signal_ctx, evoasm_arch *arch) {
91
+ struct sigaction action = {0};
92
+
93
+ signal_ctx->arch_id = arch->cls->id;
94
+
95
+ action.sa_sigaction = _evoasm_signal_handler;
96
+ sigemptyset(&action.sa_mask);
97
+ action.sa_flags = SA_SIGINFO;
98
+
99
+ if(sigaction(SIGFPE, &action, &signal_ctx->prev_action) < 0) {
100
+ perror("sigaction");
101
+ exit(1);
102
+ }
103
+
104
+ _evoasm_signal_ctx = signal_ctx;
105
+ atomic_signal_fence(memory_order_release);
106
+ }
107
+
108
+ static void
109
+ evoasm_signal_context_uninstall(struct evoasm_signal_context *signal_ctx) {
110
+ if(sigaction(SIGFPE, &signal_ctx->prev_action, NULL) < 0) {
111
+ perror("sigaction");
112
+ exit(1);
113
+ }
114
+ }
115
+
116
+ #else
117
+ #error
118
+ #endif
119
+
120
+ static inline double
121
+ evoasm_example_val_to_dbl(evoasm_example_val example_val, evoasm_example_type example_type) {
122
+ switch(example_type) {
123
+ case EVOASM_EXAMPLE_TYPE_F64:
124
+ return example_val.f64;
125
+ case EVOASM_EXAMPLE_TYPE_I64:
126
+ return (double) example_val.i64;
127
+ default:
128
+ evoasm_fatal("unsupported example type %d", example_type);
129
+ evoasm_assert_not_reached();
130
+ }
131
+ }
132
+
133
+ static bool
134
+ _evoasm_population_destroy(evoasm_population *pop, bool free_buf, bool free_body_buf) {
135
+ bool retval = true;
136
+
137
+ evoasm_prng64_destroy(&pop->prng64);
138
+ evoasm_prng32_destroy(&pop->prng32);
139
+ evoasm_free(pop->programs);
140
+ evoasm_free(pop->losses);
141
+ evoasm_free(pop->output_vals);
142
+ evoasm_free(pop->matching);
143
+
144
+ if(free_buf) EVOASM_TRY(buf_free_failed, evoasm_buf_destroy, &pop->buf);
145
+
146
+ cleanup:
147
+ if(free_body_buf) EVOASM_TRY(body_buf_failed, evoasm_buf_destroy, &pop->body_buf);
148
+ return retval;
149
+
150
+ buf_free_failed:
151
+ retval = false;
152
+ goto cleanup;
153
+
154
+ body_buf_failed:
155
+ return false;
156
+ }
157
+
158
+ static evoasm_success
159
+ evoasm_population_init(evoasm_population *pop, evoasm_search *search) {
160
+ uint32_t pop_size = search->params.pop_size;
161
+ unsigned i;
162
+
163
+ size_t body_buf_size = (size_t) (search->params.max_program_size * search->params.max_kernel_size * search->arch->cls->max_inst_len);
164
+ size_t buf_size = EVOASM_PROGRAM_INPUT_N(&search->params.program_input) * (body_buf_size + EVOASM_SEARCH_PROLOG_EPILOG_SIZE);
165
+
166
+ static evoasm_population zero_pop = {0};
167
+ *pop = zero_pop;
168
+
169
+ size_t program_size = _EVOASM_PROGRAM_SIZE(search->params.max_program_size, search->params.max_kernel_size);
170
+
171
+ pop->programs = evoasm_calloc(3 * pop_size, program_size);
172
+ pop->programs_main = pop->programs;
173
+ pop->programs_swap = pop->programs + 1 * search->params.pop_size * program_size;
174
+ pop->programs_aux = pop->programs + 2 * search->params.pop_size * program_size;
175
+
176
+ pop->output_vals = evoasm_malloc(EVOASM_PROGRAM_OUTPUT_VALS_SIZE(&search->params.program_input));
177
+ pop->matching = evoasm_malloc(search->params.program_output.arity * sizeof(uint_fast8_t));
178
+
179
+ pop->losses = (evoasm_loss *) evoasm_calloc(pop_size, sizeof(evoasm_loss));
180
+ for(i = 0; i < EVOASM_SEARCH_ELITE_SIZE; i++) {
181
+ pop->elite[i] = UINT32_MAX;
182
+ }
183
+ pop->elite_pos = 0;
184
+ pop->best_loss = INFINITY;
185
+
186
+ evoasm_prng64_init(&pop->prng64, &search->params.seed64);
187
+ evoasm_prng32_init(&pop->prng32, &search->params.seed32);
188
+
189
+ EVOASM_TRY(buf_alloc_failed, evoasm_buf_init, &pop->buf, EVOASM_BUF_TYPE_MMAP, buf_size);
190
+ EVOASM_TRY(body_buf_alloc_failed, evoasm_buf_init, &pop->body_buf, EVOASM_BUF_TYPE_MALLOC, body_buf_size);
191
+
192
+ EVOASM_TRY(prot_failed, evoasm_buf_protect, &pop->buf,
193
+ EVOASM_MPROT_RWX);
194
+
195
+ return true;
196
+
197
+ buf_alloc_failed:
198
+ _evoasm_population_destroy(pop, false, false);
199
+ return false;
200
+
201
+ body_buf_alloc_failed:
202
+ _evoasm_population_destroy(pop, true, false);
203
+ return false;
204
+
205
+ prot_failed:
206
+ _evoasm_population_destroy(pop, true, true);
207
+ return false;
208
+ }
209
+
210
+ static evoasm_success
211
+ evoasm_population_destroy(evoasm_population *pop) {
212
+ return _evoasm_population_destroy(pop, true, true);
213
+ }
214
+
215
+ #define EVOASM_SEARCH_X64_REG_TMP EVOASM_X64_REG_14
216
+
217
+
218
+ static evoasm_success
219
+ evoasm_program_x64_emit_output_store(evoasm_program *program,
220
+ unsigned example_index) {
221
+ evoasm_arch *arch = program->arch;
222
+ evoasm_x64 *x64 = (evoasm_x64 *) arch;
223
+ evoasm_x64_params params = {0};
224
+ evoasm_kernel *kernel = &program->kernels[program->params->size - 1];
225
+ unsigned i;
226
+
227
+ for(i = 0; i < kernel->n_output_regs; i++) {
228
+ evoasm_x64_reg_id reg_id = kernel->output_regs.x64[i];
229
+ evoasm_example_val *val_addr = &program->output_vals[(example_index * kernel->n_output_regs) + i];
230
+ enum evoasm_x64_reg_type reg_type = evoasm_x64_reg_type(reg_id);
231
+
232
+ evoasm_arch_param_val addr_imm = (evoasm_arch_param_val)(uintptr_t) val_addr;
233
+
234
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_SEARCH_X64_REG_TMP);
235
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, addr_imm);
236
+ EVOASM_X64_ENC(mov_r64_imm64);
237
+ evoasm_arch_save(arch, program->buf);
238
+
239
+ switch(reg_type) {
240
+ case EVOASM_X64_REG_TYPE_GP: {
241
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, reg_id);
242
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_SEARCH_X64_REG_TMP);
243
+ EVOASM_X64_ENC(mov_rm64_r64);
244
+ evoasm_arch_save(arch, program->buf);
245
+ break;
246
+ }
247
+ case EVOASM_X64_REG_TYPE_XMM: {
248
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, reg_id);
249
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_SEARCH_X64_REG_TMP);
250
+ EVOASM_X64_ENC(movsd_xmmm64_xmm);
251
+ evoasm_arch_save(arch, program->buf);
252
+ break;
253
+ }
254
+ default: {
255
+ evoasm_assert_not_reached();
256
+ }
257
+ }
258
+ }
259
+
260
+ return true;
261
+
262
+ enc_failed:
263
+ return false;
264
+ }
265
+
266
+ static void
267
+ evoasm_search_seed_kernel_param(evoasm_search *search, evoasm_kernel_param *kernel_param) {
268
+ unsigned i;
269
+ int64_t inst_idx = evoasm_prng64_rand_between(&search->pop.prng64, 0, search->params.insts_len - 1);
270
+ evoasm_inst *inst = search->params.insts[inst_idx];
271
+
272
+ kernel_param->inst = inst;
273
+
274
+ /* set parameters */
275
+ for(i = 0; i < search->params.params_len; i++) {
276
+ evoasm_domain *domain = &search->domains[inst_idx * search->params.params_len + i];
277
+ if(domain->type < EVOASM_N_DOMAIN_TYPES) {
278
+ evoasm_arch_param_id param_id = search->params.params[i];
279
+ evoasm_arch_param_val param_val;
280
+
281
+ param_val = (evoasm_arch_param_val) evoasm_domain_rand(domain, &search->pop.prng64);
282
+ evoasm_arch_params_set(
283
+ kernel_param->param_vals,
284
+ (evoasm_bitmap *) &kernel_param->set_params,
285
+ param_id,
286
+ param_val
287
+ );
288
+ }
289
+ }
290
+ }
291
+
292
+
293
+ static void
294
+ evoasm_search_seed_kernel(evoasm_search *search, evoasm_kernel_params *kernel_params,
295
+ evoasm_program_size program_size) {
296
+ unsigned i;
297
+
298
+ evoasm_kernel_size kernel_size = (evoasm_kernel_size) evoasm_prng32_rand_between(&search->pop.prng32,
299
+ search->params.min_kernel_size, search->params.max_kernel_size);
300
+
301
+ assert(kernel_size > 0);
302
+ kernel_params->size = kernel_size;
303
+ kernel_params->jmp_selector = (uint8_t) evoasm_prng32_rand_between(&search->pop.prng32, 0, UINT8_MAX);
304
+ kernel_params->branch_kernel_idx = (evoasm_kernel_size)
305
+ evoasm_prng32_rand_between(&search->pop.prng32, 0, program_size - 1);
306
+
307
+ for(i = 0; i < kernel_size; i++) {
308
+ evoasm_search_seed_kernel_param(search, &kernel_params->params[i]);
309
+ }
310
+ }
311
+
312
+
313
+ static void
314
+ evoasm_search_seed_program(evoasm_search *search, unsigned char *programs, unsigned program_index) {
315
+ unsigned i;
316
+
317
+ evoasm_program_params *program_params = _EVOASM_SEARCH_PROGRAM_PARAMS(search, programs, program_index);
318
+ evoasm_program_size program_size = (evoasm_program_size) evoasm_prng64_rand_between(&search->pop.prng64,
319
+ search->params.min_program_size, search->params.max_program_size);
320
+
321
+ assert(program_size > 0);
322
+ program_params->size = program_size;
323
+
324
+ for(i = 0; i < program_size; i++) {
325
+ evoasm_kernel_params *kernel_params = _EVOASM_PROGRAM_PARAMS_KERNEL_PARAMS(program_params, search->params.max_kernel_size, i);
326
+ evoasm_search_seed_kernel(search, kernel_params, program_size);
327
+ }
328
+
329
+ }
330
+
331
+
332
+ static void
333
+ evoasm_search_seed(evoasm_search *search, unsigned char *programs) {
334
+ unsigned i;
335
+
336
+ for(i = 0; i < search->params.pop_size; i++) {
337
+ evoasm_search_seed_program(search, programs, i);
338
+ }
339
+ }
340
+
341
+
342
+ static evoasm_success
343
+ evoasm_program_x64_emit_rflags_reset(evoasm_program *program) {
344
+ evoasm_x64 *x64 = (evoasm_x64 *) program->arch;
345
+ evoasm_x64_params params = {0};
346
+
347
+ evoasm_debug("emitting RFLAGS reset");
348
+ EVOASM_X64_ENC(pushfq);
349
+ evoasm_arch_save(program->arch, program->buf);
350
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_X64_REG_SP);
351
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM, 0);
352
+ EVOASM_X64_ENC(mov_rm64_imm32);
353
+ evoasm_arch_save(program->arch, program->buf);
354
+ EVOASM_X64_ENC(popfq);
355
+ evoasm_arch_save(program->arch, program->buf);
356
+
357
+ return true;
358
+ enc_failed:
359
+ return false;
360
+ }
361
+
362
+ static evoasm_success
363
+ evoasm_search_x64_emit_mxcsr_reset(evoasm_search *search, evoasm_buf *buf) {
364
+ evoasm_arch *arch = search->arch;
365
+ static uint32_t default_mxcsr_val = 0x1f80;
366
+ evoasm_x64 *x64 = (evoasm_x64 *) arch;
367
+ evoasm_x64_params params = {0};
368
+ evoasm_arch_param_val addr_imm = (evoasm_arch_param_val)(uintptr_t) &default_mxcsr_val;
369
+
370
+ evoasm_x64_reg_id reg_tmp0 = EVOASM_X64_REG_14;
371
+
372
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, reg_tmp0);
373
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, addr_imm);
374
+ EVOASM_X64_ENC(mov_r32_imm32);
375
+ evoasm_arch_save(arch, buf);
376
+
377
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, reg_tmp0);
378
+ EVOASM_X64_ENC(ldmxcsr_m32);
379
+ evoasm_arch_save(arch, buf);
380
+
381
+ return true;
382
+ enc_failed:
383
+ return false;
384
+ }
385
+
386
+
387
+ static evoasm_x64_reg_id
388
+ evoasm_op_x64_reg_id(evoasm_x64_operand *op, evoasm_kernel_param *param) {
389
+ evoasm_inst *inst = param->inst;
390
+
391
+ if(op->param_idx < inst->params_len) {
392
+ return (evoasm_x64_reg_id) param->param_vals[inst->params[op->param_idx].id];
393
+ } else if(op->reg_id < EVOASM_X64_N_REGS) {
394
+ return op->reg_id;
395
+ } else {
396
+ evoasm_assert_not_reached();
397
+ return 0;
398
+ }
399
+ }
400
+
401
+ typedef struct {
402
+ bool l8 : 1;
403
+ unsigned mask;
404
+ unsigned size;
405
+ } evoasm_x64_reg_modif_acc;
406
+
407
+ static void
408
+ evoasm_program_unprepare_kernel(evoasm_program *program, evoasm_kernel *kernel) {
409
+ kernel->n_input_regs = 0;
410
+ kernel->n_output_regs = 0;
411
+
412
+ static evoasm_kernel_reg_info zero_reg_info = {0};
413
+ kernel->reg_info = zero_reg_info;
414
+ }
415
+
416
+ static void
417
+ evoasm_program_unprepare(evoasm_program *program) {
418
+ unsigned i;
419
+ for(i = 0; i < program->params->size; i++) {
420
+ evoasm_program_unprepare_kernel(program, &program->kernels[i]);
421
+ }
422
+ }
423
+
424
+ static bool
425
+ evoasm_kernel_param_x64_l8(evoasm_kernel_param *param) {
426
+ return param->param_vals[EVOASM_X64_PARAM_REX_B] ||
427
+ param->param_vals[EVOASM_X64_PARAM_REX_R] ||
428
+ param->param_vals[EVOASM_X64_PARAM_REX_W] ||
429
+ param->param_vals[EVOASM_X64_PARAM_REX_X];
430
+ }
431
+
432
+ static void
433
+ evoasm_x64_reg_modif_acc_update(evoasm_x64_reg_modif_acc *reg_modif_acc,
434
+ evoasm_x64_operand *op, evoasm_kernel_param *param) {
435
+ reg_modif_acc->size = EVOASM_MAX(reg_modif_acc->size, op->size);
436
+ reg_modif_acc->mask |= op->acc_w_mask;
437
+ reg_modif_acc->l8 |= evoasm_kernel_param_x64_l8(param);
438
+ }
439
+
440
+
441
+ static bool
442
+ evoasm_x64_reg_modif_acc_uncovered_access(evoasm_x64_reg_modif_acc *reg_modif_acc, evoasm_x64_operand *op,
443
+ evoasm_kernel_param *param) {
444
+ bool uncovered_acc;
445
+ bool l8 = evoasm_kernel_param_x64_l8(param);
446
+
447
+ if(op->reg_type == EVOASM_X64_REG_TYPE_GP) {
448
+ if(op->size == EVOASM_OPERAND_SIZE_8) {
449
+ uncovered_acc = l8 != reg_modif_acc->l8;
450
+ } else if(op->size == EVOASM_OPERAND_SIZE_16) {
451
+ uncovered_acc = reg_modif_acc->size < EVOASM_OPERAND_SIZE_16;
452
+ } else {
453
+ uncovered_acc = false;
454
+ }
455
+ }
456
+ else if(op->reg_type == EVOASM_X64_REG_TYPE_XMM) {
457
+ unsigned mask;
458
+ if(op->size == EVOASM_OPERAND_SIZE_128) {
459
+ mask = EVOASM_X64_BIT_MASK_0_127;
460
+ } else {
461
+ mask = EVOASM_X64_BIT_MASK_ALL;
462
+ }
463
+ uncovered_acc = ((mask & (~reg_modif_acc->mask)) != 0);
464
+ } else {
465
+ uncovered_acc = false;
466
+ }
467
+
468
+ return uncovered_acc;
469
+ }
470
+
471
+
472
+
473
+ static void
474
+ evoasm_program_x64_prepare_kernel(evoasm_program *program, evoasm_kernel *kernel) {
475
+ unsigned i, j;
476
+
477
+ //kernel->n_input_regs = 0;
478
+ //kernel->n_output_regs = 0;
479
+
480
+ /* NOTE: output register are register that are written to
481
+ * input registers are register that are read from without
482
+ * a previous write
483
+ */
484
+ evoasm_kernel_params *kernel_params = kernel->params;
485
+
486
+ evoasm_x64_reg_modif_acc reg_modif_accs[EVOASM_X64_N_REGS] = {0};
487
+
488
+ for(i = 0; i < kernel_params->size; i++) {
489
+ evoasm_kernel_param *param = &kernel_params->params[i];
490
+ evoasm_x64_inst *x64_inst = (evoasm_x64_inst *) param->inst;
491
+
492
+ for(j = 0; j < x64_inst->n_operands; j++) {
493
+ evoasm_x64_operand *op = &x64_inst->operands[j];
494
+
495
+ if(op->type == EVOASM_X64_OPERAND_TYPE_REG ||
496
+ op->type == EVOASM_X64_OPERAND_TYPE_RM) {
497
+ evoasm_x64_reg_id reg_id;
498
+
499
+ if(op->reg_type == EVOASM_X64_REG_TYPE_RFLAGS) {
500
+ if(op->acc_r) {
501
+ program->reset_rflags = true;
502
+ } else if(op->acc_w) {
503
+ kernel->reg_info.x64[op->reg_id].written = true;
504
+ }
505
+ }
506
+ else {
507
+ reg_id = evoasm_op_x64_reg_id(op, param);
508
+ evoasm_kernel_x64_reg_info *reg_info = &kernel->reg_info.x64[reg_id];
509
+ evoasm_x64_reg_modif_acc *reg_modif_acc = &reg_modif_accs[reg_id];
510
+
511
+ /*
512
+ * Conditional writes (acc_c) might or might not do the write.
513
+ */
514
+
515
+ if(op->acc_r || op->acc_c) {
516
+ if(!reg_info->input) {
517
+ // has not been written before, might contain garbage
518
+ bool dirty_read;
519
+
520
+ if(!reg_info->written) {
521
+ dirty_read = true;
522
+ } else {
523
+ dirty_read = evoasm_x64_reg_modif_acc_uncovered_access(reg_modif_acc, op, param);
524
+ }
525
+
526
+ if(dirty_read) {
527
+ reg_info->input = true;
528
+ kernel->n_input_regs++;
529
+ }
530
+ }
531
+ }
532
+
533
+ if(op->acc_w) {
534
+ // ???
535
+ //evoasm_operand_size reg_size = (evoasm_operand_size) EVOASM_MIN(output_sizes[program->n_output_regs],
536
+ // op->acc_c ? EVOASM_N_OPERAND_SIZES : op->size);
537
+
538
+ if(!reg_info->written) {
539
+ reg_info->written = true;
540
+ reg_info->output = true;
541
+ kernel->output_regs.x64[kernel->n_output_regs] = reg_id;
542
+ kernel->n_output_regs++;
543
+ }
544
+
545
+ evoasm_x64_reg_modif_acc_update(reg_modif_acc, op, param);
546
+ }
547
+ }
548
+ }
549
+ }
550
+ }
551
+
552
+ assert(kernel->n_output_regs <= EVOASM_KERNEL_MAX_OUTPUT_REGS);
553
+ assert(kernel->n_input_regs <= EVOASM_KERNEL_MAX_INPUT_REGS);
554
+ }
555
+
556
+ static void
557
+ evoasm_program_x64_prepare(evoasm_program *program) {
558
+ unsigned i;
559
+ for(i = 0; i < program->params->size; i++) {
560
+ evoasm_kernel *kernel = &program->kernels[i];
561
+ evoasm_program_x64_prepare_kernel(program, kernel);
562
+ }
563
+
564
+ }
565
+
566
+ static evoasm_success
567
+ evoasm_program_x64_emit_input_load(evoasm_program *program,
568
+ evoasm_example_val *input_vals,
569
+ evoasm_example_type *types,
570
+ unsigned in_arity,
571
+ bool set_io_mapping) {
572
+
573
+
574
+ evoasm_x64 *x64 = (evoasm_x64 *) program->arch;
575
+ evoasm_example_val *loaded_example = NULL;
576
+ evoasm_kernel *kernel = &program->kernels[0];
577
+
578
+ evoasm_x64_reg_id input_reg_id;
579
+ unsigned input_reg_idx;
580
+
581
+ evoasm_debug("n input regs %d", kernel->n_input_regs);
582
+
583
+
584
+ for(input_reg_id = 0, input_reg_idx = 0; input_reg_idx < kernel->n_input_regs; input_reg_id++) {
585
+ if(!kernel->reg_info.x64[input_reg_id].input) continue;
586
+
587
+ unsigned example_idx;
588
+
589
+ if(set_io_mapping) {
590
+ example_idx = input_reg_idx % in_arity;
591
+ program->reg_inputs.x64[input_reg_id] = (uint8_t) example_idx;
592
+ } else {
593
+ example_idx = program->reg_inputs.x64[input_reg_id];
594
+ }
595
+
596
+ evoasm_example_val *example = &input_vals[example_idx];
597
+ evoasm_x64_params params = {0};
598
+ enum evoasm_x64_reg_type reg_type = evoasm_x64_reg_type(input_reg_id);
599
+
600
+ evoasm_debug("emitting input register initialization of register %d to value %" PRId64, input_reg_id, example->i64);
601
+
602
+ switch(reg_type) {
603
+ case EVOASM_X64_REG_TYPE_GP: {
604
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
605
+ /*FIXME: hard-coded example type */
606
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_arch_param_val) example->i64);
607
+ EVOASM_X64_ENC(mov_r64_imm64);
608
+ evoasm_arch_save(program->arch, program->buf);
609
+ break;
610
+ }
611
+ case EVOASM_X64_REG_TYPE_XMM: {
612
+ /* load address of example into tmp_reg */
613
+ if(loaded_example != example) {
614
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_SEARCH_X64_REG_TMP);
615
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_arch_param_val)(uintptr_t) &example->f64);
616
+ EVOASM_X64_ENC(mov_r64_imm64);
617
+ loaded_example = example;
618
+ }
619
+
620
+ /* load into xmm via address in tmp_reg */
621
+ /*FIXME: hard-coded example type */
622
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
623
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_SEARCH_X64_REG_TMP);
624
+ EVOASM_X64_ENC(movsd_xmm_xmmm64);
625
+ evoasm_arch_save(program->arch, program->buf);
626
+ break;
627
+ }
628
+ default:
629
+ evoasm_fatal("non-gpr register type (%d) (unimplemented)", reg_type);
630
+ evoasm_assert_not_reached();
631
+ }
632
+
633
+ input_reg_idx++;
634
+ }
635
+
636
+ if(program->reset_rflags) {
637
+ EVOASM_TRY(error, evoasm_program_x64_emit_rflags_reset, program);
638
+ }
639
+ return true;
640
+
641
+ error:
642
+ enc_failed:
643
+ return false;
644
+ }
645
+
646
+ static evoasm_success
647
+ evoasm_program_x64_emit_kernel_transition(evoasm_program *program,
648
+ evoasm_kernel *kernel,
649
+ evoasm_kernel *target_kernel,
650
+ evoasm_buf *buf,
651
+ unsigned trans_idx,
652
+ bool set_io_mapping) {
653
+ evoasm_arch *arch = program->arch;
654
+ evoasm_x64 *x64 = (evoasm_x64 *) arch;
655
+ unsigned input_reg_idx;
656
+ evoasm_x64_reg_id input_reg_id;
657
+
658
+ for(input_reg_id = 0, input_reg_idx = 0; input_reg_id < EVOASM_X64_N_REGS; input_reg_id++) {
659
+ if(!target_kernel->reg_info.x64[input_reg_id].input) continue;
660
+
661
+ evoasm_x64_reg_id output_reg_id;
662
+
663
+ if(set_io_mapping) {
664
+ unsigned output_reg_idx = input_reg_idx % kernel->n_output_regs;
665
+ output_reg_id = kernel->output_regs.x64[output_reg_idx];
666
+
667
+ kernel->reg_info.x64[input_reg_id].trans_regs[trans_idx] = output_reg_id;
668
+ } else {
669
+ output_reg_id = kernel->reg_info.x64[input_reg_id].trans_regs[trans_idx];
670
+ }
671
+
672
+ enum evoasm_x64_reg_type output_reg_type = evoasm_x64_reg_type(output_reg_id);
673
+ enum evoasm_x64_reg_type input_reg_type = evoasm_x64_reg_type(input_reg_id);
674
+ evoasm_x64_params params = {0};
675
+
676
+ if(input_reg_id != output_reg_id) {
677
+ if(output_reg_type == EVOASM_X64_REG_TYPE_GP &&
678
+ input_reg_type == EVOASM_X64_REG_TYPE_GP) {
679
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
680
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
681
+ EVOASM_X64_ENC(mov_r64_rm64);
682
+ evoasm_arch_save(program->arch, buf);
683
+ }
684
+ else if(output_reg_type == EVOASM_X64_REG_TYPE_XMM &&
685
+ input_reg_type == EVOASM_X64_REG_TYPE_XMM) {
686
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
687
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
688
+ if(x64->features & EVOASM_X64_FEATURE_AVX) {
689
+ EVOASM_X64_ENC(vmovdqa_ymm_ymmm256);
690
+ }
691
+ else {
692
+ EVOASM_X64_ENC(movdqa_xmm_xmmm128);
693
+ }
694
+ evoasm_arch_save(program->arch, buf);
695
+ }
696
+ else if(output_reg_type == EVOASM_X64_REG_TYPE_GP &&
697
+ input_reg_type == EVOASM_X64_REG_TYPE_XMM) {
698
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
699
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
700
+ if(x64->features & EVOASM_X64_FEATURE_AVX) {
701
+ EVOASM_X64_ENC(vmovq_xmm_rm64);
702
+ } else {
703
+ EVOASM_X64_ENC(movq_xmm_rm64);
704
+ }
705
+ evoasm_arch_save(program->arch, buf);
706
+ }
707
+ else if(output_reg_type == EVOASM_X64_REG_TYPE_XMM &&
708
+ input_reg_type == EVOASM_X64_REG_TYPE_GP) {
709
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
710
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
711
+ if(x64->features & EVOASM_X64_FEATURE_AVX) {
712
+ EVOASM_X64_ENC(vmovq_rm64_xmm);
713
+ }
714
+ else {
715
+ EVOASM_X64_ENC(movq_rm64_xmm);
716
+ }
717
+ evoasm_arch_save(program->arch, buf);
718
+ }
719
+ else {
720
+ evoasm_assert_not_reached();
721
+ }
722
+ }
723
+ input_reg_idx++;
724
+ }
725
+
726
+ return true;
727
+
728
+ enc_failed:
729
+ return false;
730
+ }
731
+
732
+ #define _EVOASM_BUF_PHI_GET(buf) ((uint32_t *)((buf)->data + (buf)->pos - 4))
733
+ #define _EVOASM_BUF_PHI_SET(label, val) \
734
+ do { (*(label) = (uint32_t)((uint8_t *)(val) - ((uint8_t *)(label) + 4)));} while(0);
735
+ #define _EVOASM_BUF_POS_ADDR(buf) (buf->data + buf->pos)
736
+
737
+ static evoasm_success
738
+ evoasm_program_x64_emit_kernel_transitions(evoasm_program *program,
739
+ evoasm_kernel *kernel,
740
+ evoasm_kernel *next_kernel,
741
+ evoasm_kernel *branch_kernel,
742
+ evoasm_buf *buf,
743
+ uint32_t **branch_kernel_phi,
744
+ bool set_io_mapping) {
745
+
746
+ evoasm_arch *arch = program->arch;
747
+ evoasm_x64 *x64 = (evoasm_x64 *) arch;
748
+ unsigned jmp_insts_len = 0;
749
+ evoasm_inst_id jmp_insts[32];
750
+ bool jbe = false;
751
+ bool jle = false;
752
+ evoasm_x64_params params = {0};
753
+ uint32_t *branch_phi = NULL;
754
+ uint32_t *counter_phi = NULL;
755
+
756
+ if(program->search_params->recur_limit == 0) goto next_trans;
757
+
758
+ if(kernel->reg_info.x64[EVOASM_X64_REG_OF].written) {
759
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JO_REL32;
760
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JNO_REL32;
761
+ }
762
+
763
+ if(kernel->reg_info.x64[EVOASM_X64_REG_SF].written) {
764
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JS_REL32;
765
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JNS_REL32;
766
+ }
767
+
768
+ if(kernel->reg_info.x64[EVOASM_X64_REG_ZF].written) {
769
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JE_JZ_REL32;
770
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JNS_REL32;
771
+
772
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JBE_JNA_REL32;
773
+ jbe = true;
774
+
775
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JLE_JNG_REL32;
776
+ jle = true;
777
+ }
778
+
779
+ if(kernel->reg_info.x64[EVOASM_X64_REG_CF].written) {
780
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JB_JC_JNAE_REL32;
781
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JAE_JNB_JNC_REL32;
782
+
783
+ if(!jbe) {
784
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JBE_JNA_REL32;
785
+ }
786
+ }
787
+
788
+ if(kernel->reg_info.x64[EVOASM_X64_REG_ZF].written &&
789
+ kernel->reg_info.x64[EVOASM_X64_REG_CF].written) {
790
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JA_JNBE_REL32;
791
+ }
792
+
793
+ if(kernel->reg_info.x64[EVOASM_X64_REG_SF].written &&
794
+ kernel->reg_info.x64[EVOASM_X64_REG_OF].written) {
795
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JL_JNGE_REL32;
796
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JGE_JNL_REL32;
797
+
798
+ if(!jle) {
799
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JLE_JNG_REL32;
800
+ }
801
+
802
+ if(kernel->reg_info.x64[EVOASM_X64_REG_ZF].written) {
803
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JG_JNLE_REL32;
804
+ }
805
+ }
806
+
807
+ if(kernel->reg_info.x64[EVOASM_X64_REG_CF].written) {
808
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JB_JC_JNAE_REL32;
809
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JAE_JNB_JNC_REL32;
810
+ }
811
+
812
+ if(kernel->reg_info.x64[EVOASM_X64_REG_PF].written) {
813
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JP_JPE_REL32;
814
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JNP_JPO_REL32;
815
+ }
816
+
817
+ #if 0
818
+ /*FIXME: only 8bit possible, check and activate if feasable*/
819
+ if(kernel->reg_info.x64[EVOASM_X64_REG_C].written) {
820
+ jmp_insts[jmp_insts_len++] = EVOASM_X64_INST_JECXZ_JRCXZ_REL8;
821
+ }
822
+ #endif
823
+
824
+ if(jmp_insts_len > 0) {
825
+ evoasm_inst_id jmp_inst_id = jmp_insts[kernel->params->jmp_selector % jmp_insts_len];
826
+ evoasm_inst *jmp_inst = (evoasm_inst *) evoasm_x64_get_inst(x64, jmp_inst_id, false);
827
+
828
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
829
+ EVOASM_TRY(error, evoasm_inst_encode, jmp_inst, arch, params.vals, (evoasm_bitmap *) &params.set);
830
+ evoasm_arch_save(arch, buf);
831
+ branch_phi = _EVOASM_BUF_PHI_GET(buf);
832
+ assert(*branch_phi == 0xdeadbeef);
833
+
834
+ if(branch_kernel->idx <= kernel->idx) {
835
+ /* back jump, guard with counter */
836
+
837
+ uint32_t *counter = &program->recur_counters[kernel->idx];
838
+ uintptr_t addr_imm = (uintptr_t) counter;
839
+
840
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_SEARCH_X64_REG_TMP);
841
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_arch_param_val) addr_imm);
842
+ EVOASM_X64_ENC(mov_r64_imm64);
843
+ evoasm_arch_save(arch, buf);
844
+
845
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_SEARCH_X64_REG_TMP);
846
+ EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, program->search_params->recur_limit);
847
+ EVOASM_X64_ENC(cmp_rm32_imm32);
848
+ evoasm_arch_save(arch, buf);
849
+
850
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
851
+ EVOASM_X64_ENC(jge_jnl_rel32);
852
+ evoasm_arch_save(arch, buf);
853
+ counter_phi = _EVOASM_BUF_PHI_GET(buf);
854
+ assert(*counter_phi == 0xdeadbeef);
855
+
856
+ EVOASM_X64_ENC(inc_rm32);
857
+ evoasm_arch_save(arch, buf);
858
+ }
859
+
860
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transition, program,
861
+ kernel, branch_kernel, buf, 1, set_io_mapping);
862
+
863
+ EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
864
+ EVOASM_X64_ENC(jmp_rel32);
865
+ evoasm_arch_save(arch, buf);
866
+ *branch_kernel_phi = _EVOASM_BUF_PHI_GET(buf);
867
+ assert(**branch_kernel_phi == 0xdeadbeef);
868
+ }
869
+
870
+ if(branch_phi != NULL) {
871
+ _EVOASM_BUF_PHI_SET(branch_phi, _EVOASM_BUF_POS_ADDR(buf));
872
+ }
873
+
874
+ if(counter_phi != NULL) {
875
+ _EVOASM_BUF_PHI_SET(counter_phi, _EVOASM_BUF_POS_ADDR(buf));
876
+ }
877
+
878
+ next_trans:
879
+
880
+ if(next_kernel != NULL) {
881
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transition, program,
882
+ kernel, next_kernel, buf, 0, set_io_mapping);
883
+ }
884
+
885
+ evoasm_buf_log(buf, EVOASM_LOG_LEVEL_DEBUG);
886
+
887
+ return true;
888
+
889
+ error:
890
+ enc_failed:
891
+ return false;
892
+ }
893
+
894
+
895
+ static evoasm_success
896
+ evoasm_program_x64_emit_kernel(evoasm_program *program, evoasm_kernel *kernel, evoasm_buf *buf) {
897
+ unsigned i;
898
+ evoasm_arch *arch = program->arch;
899
+ evoasm_kernel_params *kernel_params = kernel->params;
900
+
901
+ assert(kernel_params->size > 0);
902
+ for(i = 0; i < kernel_params->size; i++) {
903
+ evoasm_inst *inst = kernel_params->params[i].inst;
904
+ evoasm_x64_inst *x64_inst = (evoasm_x64_inst *) inst;
905
+ program->exception_mask = program->exception_mask | x64_inst->exceptions;
906
+ EVOASM_TRY(error, evoasm_inst_encode,
907
+ inst,
908
+ arch,
909
+ kernel_params->params[i].param_vals,
910
+ (evoasm_bitmap *) &kernel_params->params[i].set_params);
911
+
912
+ evoasm_arch_save(arch, buf);
913
+ }
914
+ return true;
915
+ error:
916
+ return false;
917
+ }
918
+
919
+
920
+ static evoasm_success
921
+ evoasm_program_x64_emit_program_kernels(evoasm_program *program, bool set_io_mapping) {
922
+ unsigned i;
923
+ evoasm_buf *buf = program->body_buf;
924
+ evoasm_program_params *program_params = program->params;
925
+ evoasm_kernel *kernel, *next_kernel, *branch_kernel;
926
+ unsigned size = program_params->size;
927
+ uint32_t *branch_phis[EVOASM_PROGRAM_MAX_SIZE] = {0};
928
+ uint8_t *kernel_addrs[EVOASM_PROGRAM_MAX_SIZE];
929
+
930
+ evoasm_buf_reset(buf);
931
+
932
+ assert(size > 0);
933
+
934
+ for(i = 0; i < size; i++) {
935
+ kernel = &program->kernels[i];
936
+
937
+ kernel_addrs[i] = buf->data + buf->pos;
938
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel, program, kernel, buf);
939
+
940
+ if(i < size - 1) {
941
+ next_kernel = &program->kernels[i + 1];
942
+ } else {
943
+ next_kernel = NULL;
944
+ }
945
+
946
+ assert(kernel->params->branch_kernel_idx < program->params->size);
947
+ branch_kernel = &program->kernels[kernel->params->branch_kernel_idx];
948
+
949
+ EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transitions, program, kernel,
950
+ next_kernel, branch_kernel, buf, &branch_phis[i], set_io_mapping);
951
+ }
952
+
953
+ for(i = 0; i < size; i++) {
954
+ uint32_t *branch_phi = branch_phis[i];
955
+ if(branch_phi != NULL) {
956
+ kernel = &program->kernels[i];
957
+ uint8_t *branch_kernel_addr = kernel_addrs[kernel->params->branch_kernel_idx];
958
+ assert(*branch_phi == 0xdeadbeef);
959
+ _EVOASM_BUF_PHI_SET(branch_phi, branch_kernel_addr);
960
+ }
961
+ }
962
+
963
+ return true;
964
+ error:
965
+ return false;
966
+ }
967
+
968
+ static evoasm_success
969
+ evoasm_program_x64_emit_io_load_store(evoasm_program *program,
970
+ evoasm_program_input *input,
971
+ bool io_mapping) {
972
+ unsigned i;
973
+ unsigned n_examples = EVOASM_PROGRAM_INPUT_N(input);
974
+
975
+ evoasm_buf_reset(program->buf);
976
+ EVOASM_TRY(error, evoasm_x64_func_prolog, (evoasm_x64 *) program->arch, program->buf, EVOASM_X64_ABI_SYSV);
977
+
978
+ for(i = 0; i < n_examples; i++) {
979
+ evoasm_example_val *input_vals = input->vals + i * input->arity;
980
+ EVOASM_TRY(error, evoasm_program_x64_emit_input_load, program, input_vals, input->types, input->arity, io_mapping);
981
+ {
982
+ size_t r = evoasm_buf_append(program->buf, program->body_buf);
983
+ assert(r == 0);
984
+ }
985
+ EVOASM_TRY(error, evoasm_program_x64_emit_output_store, program, i);
986
+ }
987
+
988
+ EVOASM_TRY(error, evoasm_x64_func_epilog, (evoasm_x64 *) program->arch, program->buf, EVOASM_X64_ABI_SYSV);
989
+ return true;
990
+
991
+ error:
992
+ return false;
993
+ }
994
+
995
+ static evoasm_success
996
+ evoasm_program_x64_emit(evoasm_program *program,
997
+ evoasm_program_input *input,
998
+ bool prepare, bool emit_kernels, bool emit_io_load_store, bool set_io_mapping) {
999
+
1000
+ if(prepare) {
1001
+ evoasm_program_x64_prepare(program);
1002
+ }
1003
+
1004
+ if(emit_kernels) {
1005
+ EVOASM_TRY(error, evoasm_program_x64_emit_program_kernels, program, set_io_mapping);
1006
+ }
1007
+
1008
+ if(emit_io_load_store) {
1009
+ EVOASM_TRY(error, evoasm_program_x64_emit_io_load_store, program, input, set_io_mapping);
1010
+ }
1011
+
1012
+ evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_DEBUG);
1013
+
1014
+
1015
+ return true;
1016
+
1017
+ error:
1018
+ return false;
1019
+ }
1020
+
1021
+ static evoasm_success
1022
+ evoasm_program_emit(evoasm_program *program,
1023
+ evoasm_program_input *input,
1024
+ bool prepare, bool emit_kernels, bool emit_io_load_store, bool set_io_mapping) {
1025
+ evoasm_arch *arch = program->arch;
1026
+
1027
+ switch(arch->cls->id) {
1028
+ case EVOASM_ARCH_X64: {
1029
+ return evoasm_program_x64_emit(program, input,
1030
+ prepare, emit_kernels, emit_io_load_store, set_io_mapping);
1031
+ break;
1032
+ }
1033
+ default:
1034
+ evoasm_assert_not_reached();
1035
+ }
1036
+ }
1037
+
1038
+ typedef enum {
1039
+ EVOASM_METRIC_ABSDIFF,
1040
+ EVOASM_N_METRICS
1041
+ } evoasm_metric;
1042
+
1043
+ static inline void
1044
+ evoasm_program_update_dist_mat(evoasm_program *program,
1045
+ evoasm_kernel *kernel,
1046
+ evoasm_program_output *output,
1047
+ unsigned height,
1048
+ unsigned example_index,
1049
+ double *dist_mat,
1050
+ evoasm_metric metric) {
1051
+ unsigned i, j;
1052
+ unsigned width = kernel->n_output_regs;
1053
+ evoasm_example_val *example_vals = output->vals + example_index * output->arity;
1054
+
1055
+ for(i = 0; i < height; i++) {
1056
+ evoasm_example_val example_val = example_vals[i];
1057
+ evoasm_example_type example_type = output->types[i];
1058
+ double example_val_dbl = evoasm_example_val_to_dbl(example_val, example_type);
1059
+
1060
+ for(j = 0; j < width; j++) {
1061
+ evoasm_example_val output_val = program->output_vals[example_index * width + j];
1062
+ //uint8_t output_size = program->output_sizes[j];
1063
+ //switch(output_size) {
1064
+ //
1065
+ //}
1066
+ // FIXME: output is essentially just a bitstring and could be anything
1067
+ // an integer (both, signed or unsigned) a float or double.
1068
+ // Moreover, a portion of the output value could
1069
+ // hold the correct answer (e.g. lower 8 or 16 bits etc.).
1070
+ // For now we use the example output type and assume signedness.
1071
+ // This needs to be fixed.
1072
+ double output_val_dbl = evoasm_example_val_to_dbl(output_val, example_type);
1073
+
1074
+ switch(metric) {
1075
+ default:
1076
+ case EVOASM_METRIC_ABSDIFF: {
1077
+ double dist = fabs(output_val_dbl - example_val_dbl);
1078
+ dist_mat[i * width + j] += dist;
1079
+ break;
1080
+ }
1081
+ }
1082
+ }
1083
+ }
1084
+ }
1085
+
1086
+ static void
1087
+ evoasm_program_log_program_output(evoasm_program *program,
1088
+ evoasm_kernel *kernel,
1089
+ evoasm_program_output *output,
1090
+ uint_fast8_t * const matching,
1091
+ evoasm_log_level log_level) {
1092
+
1093
+ unsigned n_examples = EVOASM_PROGRAM_OUTPUT_N(output);
1094
+ unsigned height = output->arity;
1095
+ unsigned width = kernel->n_output_regs;
1096
+ unsigned i, j, k;
1097
+
1098
+ evoasm_log(log_level, EVOASM_LOG_TAG, "OUTPUT MATRICES:\n");
1099
+
1100
+ for(i = 0; i < n_examples; i++) {
1101
+ for(j = 0; j < height; j++) {
1102
+ for(k = 0; k < width; k++) {
1103
+ bool matched = matching[j] == k;
1104
+ evoasm_example_val val = program->output_vals[i * width + k];
1105
+ if(matched) {
1106
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[1m ");
1107
+ }
1108
+ evoasm_log(log_level, EVOASM_LOG_TAG, " %ld (%f)\t ", val.i64, val.f64);
1109
+ if(matched) {
1110
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[0m ");
1111
+ }
1112
+ }
1113
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
1114
+ }
1115
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
1116
+ }
1117
+ }
1118
+
1119
+ static void
1120
+ evoasm_program_log_dist_dist_mat(evoasm_program *program,
1121
+ evoasm_kernel *kernel,
1122
+ unsigned height,
1123
+ double *dist_mat,
1124
+ uint_fast8_t *matching,
1125
+ evoasm_log_level log_level) {
1126
+
1127
+ unsigned width = kernel->n_output_regs;
1128
+ unsigned i, j;
1129
+
1130
+ evoasm_log(log_level, EVOASM_LOG_TAG, "DIST MATRIX: (%d, %d)\n", height, width);
1131
+ for(i = 0; i < height; i++) {
1132
+ for(j = 0; j < width; j++) {
1133
+ if(matching[i] == j) {
1134
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[1m ");
1135
+ }
1136
+ evoasm_log(log_level, EVOASM_LOG_TAG, " %.2g\t ", dist_mat[i * width + j]);
1137
+ if(matching[i] == j) {
1138
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[0m ");
1139
+ }
1140
+ }
1141
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
1142
+ }
1143
+ evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
1144
+ }
1145
+
1146
+
1147
+ static inline bool
1148
+ evoasm_program_match(evoasm_program *program,
1149
+ unsigned width,
1150
+ double *dist_mat,
1151
+ uint_fast8_t *matching) {
1152
+
1153
+ uint_fast8_t best_index = UINT_FAST8_MAX;
1154
+ double best_dist = INFINITY;
1155
+ uint_fast8_t i;
1156
+
1157
+ for(i = 0; i < width; i++) {
1158
+ double v = dist_mat[i];
1159
+ if(v < best_dist) {
1160
+ best_dist = v;
1161
+ best_index = i;
1162
+ }
1163
+ }
1164
+
1165
+ if(EVOASM_LIKELY(best_index != UINT_FAST8_MAX)) {
1166
+ *matching = best_index;
1167
+ return true;
1168
+ } else {
1169
+ /*evoasm_program_log_dist_dist_mat(program,
1170
+ 1,
1171
+ dist_mat,
1172
+ matching,
1173
+ EVOASM_LOG_LEVEL_WARN);
1174
+ evoasm_assert_not_reached();*/
1175
+ /*
1176
+ * Might happen if all elements are inf or nan
1177
+ */
1178
+ return false;
1179
+ }
1180
+ }
1181
+
1182
+ static inline void
1183
+ evoasm_program_calc_stable_matching(evoasm_program *program,
1184
+ evoasm_kernel *kernel,
1185
+ unsigned height,
1186
+ double *dist_mat,
1187
+ uint_fast8_t *matching) {
1188
+
1189
+ uint_fast8_t width = (uint_fast8_t) kernel->n_output_regs;
1190
+ uint_fast8_t *inv_matching = evoasm_alloca(width * sizeof(uint_fast8_t));
1191
+ uint_fast8_t i;
1192
+
1193
+ // calculates a stable matching
1194
+ for(i = 0; i < height; i++) {
1195
+ matching[i] = UINT_FAST8_MAX;
1196
+ }
1197
+
1198
+ for(i = 0; i < width; i++) {
1199
+ inv_matching[i] = UINT_FAST8_MAX;
1200
+ }
1201
+
1202
+ while(true) {
1203
+ uint_fast8_t unmatched_index = UINT_FAST8_MAX;
1204
+ uint_fast8_t best_index = UINT_FAST8_MAX;
1205
+ double best_dist = INFINITY;
1206
+
1207
+ for(i = 0; i < height; i++) {
1208
+ if(matching[i] == UINT_FAST8_MAX) {
1209
+ unmatched_index = i;
1210
+ break;
1211
+ }
1212
+ }
1213
+
1214
+ if(unmatched_index == UINT_FAST8_MAX) {
1215
+ break;
1216
+ }
1217
+
1218
+ for(i = 0; i < width; i++) {
1219
+ double v = dist_mat[unmatched_index * width + i];
1220
+ if(v < best_dist) {
1221
+ best_dist = v;
1222
+ best_index = i;
1223
+ }
1224
+ }
1225
+
1226
+ if(EVOASM_LIKELY(best_index != UINT_FAST8_MAX)) {
1227
+ if(inv_matching[best_index] == UINT_FAST8_MAX) {
1228
+ inv_matching[best_index] = unmatched_index;
1229
+ matching[unmatched_index] = best_index;
1230
+ }
1231
+ else {
1232
+ if(dist_mat[inv_matching[best_index] * width + best_index] > best_dist) {
1233
+ matching[inv_matching[best_index]] = UINT_FAST8_MAX;
1234
+ inv_matching[best_index] = unmatched_index;
1235
+ matching[unmatched_index] = best_index;
1236
+ } else {
1237
+ //dist_mat[unmatched_index * width + i] = copysign(best_dist, -1.0);
1238
+ dist_mat[unmatched_index * width + i] = INFINITY;
1239
+ }
1240
+ }
1241
+ }
1242
+ else {
1243
+ evoasm_program_log_dist_dist_mat(program,
1244
+ kernel,
1245
+ height,
1246
+ dist_mat,
1247
+ matching,
1248
+ EVOASM_LOG_LEVEL_DEBUG);
1249
+ evoasm_assert_not_reached();
1250
+ }
1251
+ }
1252
+ }
1253
+
1254
+
1255
+ static inline evoasm_loss
1256
+ evoasm_program_calc_loss(evoasm_program *program,
1257
+ evoasm_kernel *kernel,
1258
+ unsigned height,
1259
+ double *dist_mat,
1260
+ uint_fast8_t *matching) {
1261
+ unsigned i;
1262
+ unsigned width = kernel->n_output_regs;
1263
+ double scale = 1.0 / width;
1264
+ evoasm_loss loss = 0.0;
1265
+
1266
+ for(i = 0; i < height; i++) {
1267
+ loss += scale * dist_mat[i * width + matching[i]];
1268
+ }
1269
+
1270
+ return loss;
1271
+ }
1272
+
1273
+ static evoasm_loss
1274
+ evoasm_program_assess(evoasm_program *program,
1275
+ evoasm_program_output *output) {
1276
+
1277
+ unsigned i;
1278
+ unsigned n_examples = EVOASM_PROGRAM_OUTPUT_N(output);
1279
+ unsigned height = output->arity;
1280
+ evoasm_kernel *kernel = &program->kernels[program->params->size - 1];
1281
+ unsigned width = kernel->n_output_regs;
1282
+ size_t dist_mat_len = (size_t)(width * height);
1283
+ double *dist_mat = evoasm_alloca(dist_mat_len * sizeof(double));
1284
+ uint_fast8_t *matching = evoasm_alloca(height * sizeof(uint_fast8_t));
1285
+ evoasm_loss loss;
1286
+
1287
+ for(i = 0; i < dist_mat_len; i++) {
1288
+ dist_mat[i] = 0.0;
1289
+ }
1290
+
1291
+ if(height == 1) {
1292
+ /* COMMON FAST-PATH */
1293
+ for(i = 0; i < n_examples; i++) {
1294
+ evoasm_program_update_dist_mat(program, kernel, output, 1, i, dist_mat, EVOASM_METRIC_ABSDIFF);
1295
+ }
1296
+
1297
+ if(evoasm_program_match(program, width, dist_mat, matching)) {
1298
+ loss = evoasm_program_calc_loss(program, kernel, 1, dist_mat, matching);
1299
+ } else {
1300
+ loss = INFINITY;
1301
+ }
1302
+ }
1303
+ else {
1304
+ for(i = 0; i < n_examples; i++) {
1305
+ evoasm_program_update_dist_mat(program, kernel, output, height, i, dist_mat, EVOASM_METRIC_ABSDIFF);
1306
+ }
1307
+
1308
+ evoasm_program_calc_stable_matching(program, kernel, height, dist_mat, matching);
1309
+ loss = evoasm_program_calc_loss(program, kernel, height, dist_mat, matching);
1310
+ }
1311
+
1312
+
1313
+
1314
+ #if EVOASM_MIN_LOG_LEVEL <= EVOASM_LOG_LEVEL_DEBUG
1315
+ if(loss == 0.0) {
1316
+ evoasm_program_log_program_output(program,
1317
+ kernel,
1318
+ output,
1319
+ matching,
1320
+ EVOASM_LOG_LEVEL_DEBUG);
1321
+ }
1322
+ #endif
1323
+
1324
+ for(i = 0; i < height; i++) {
1325
+ switch(program->arch->cls->id) {
1326
+ case EVOASM_ARCH_X64: {
1327
+ program->output_regs[i] = kernel->output_regs.x64[matching[i]];
1328
+ break;
1329
+ }
1330
+ default:
1331
+ evoasm_assert_not_reached();
1332
+ }
1333
+ }
1334
+
1335
+ return loss;
1336
+ }
1337
+
1338
+ static void
1339
+ evoasm_program_load_output(evoasm_program *program,
1340
+ evoasm_kernel *kernel,
1341
+ evoasm_program_input *input,
1342
+ evoasm_program_output *output,
1343
+ evoasm_program_output *loaded_output) {
1344
+
1345
+ unsigned i, j;
1346
+ unsigned width = kernel->n_output_regs;
1347
+ unsigned height = output->arity;
1348
+ unsigned n_examples = EVOASM_PROGRAM_INPUT_N(input);
1349
+ uint_fast8_t *matching = evoasm_alloca(height * sizeof(uint_fast8_t));
1350
+
1351
+ loaded_output->len = (uint16_t)(EVOASM_PROGRAM_INPUT_N(input) * height);
1352
+ loaded_output->vals = evoasm_malloc((size_t) loaded_output->len * sizeof(evoasm_example_val));
1353
+
1354
+ for(i = 0; i < height; i++) {
1355
+ for(j = 0; j < kernel->n_output_regs; j++) {
1356
+ if(program->output_regs[i] == kernel->output_regs.x64[j]) {
1357
+ matching[i] = (uint_fast8_t) j;
1358
+ goto next;
1359
+ }
1360
+ }
1361
+ evoasm_fatal("program output reg %d not found in kernel output regs", program->output_regs[i]);
1362
+ evoasm_assert_not_reached();
1363
+ next:;
1364
+ }
1365
+
1366
+ for(i = 0; i < n_examples; i++) {
1367
+ for(j = 0; j < height; j++) {
1368
+ loaded_output->vals[i * height + j] = program->output_vals[i * width + matching[j]];
1369
+ }
1370
+ }
1371
+
1372
+ loaded_output->arity = output->arity;
1373
+ memcpy(loaded_output->types, output->types, EVOASM_ARY_LEN(output->types));
1374
+
1375
+ //#if EVOASM_MIN_LOG_LEVEL <= EVOASM_LOG_LEVEL_INFO
1376
+
1377
+ evoasm_program_log_program_output(program,
1378
+ kernel,
1379
+ loaded_output,
1380
+ matching,
1381
+ EVOASM_LOG_LEVEL_WARN);
1382
+ //#endif
1383
+ }
1384
+
1385
+ void
1386
+ evoasm_program_io_destroy(evoasm_program_io *program_io) {
1387
+ evoasm_free(program_io->vals);
1388
+ }
1389
+
1390
+ evoasm_success
1391
+ evoasm_program_run(evoasm_program *program,
1392
+ evoasm_program_input *input,
1393
+ evoasm_program_output *output) {
1394
+ bool retval;
1395
+ struct evoasm_signal_context signal_ctx = {0};
1396
+ unsigned i;
1397
+ evoasm_kernel *kernel = &program->kernels[program->params->size - 1];
1398
+
1399
+ if(input->arity != program->_input.arity) {
1400
+ evoasm_set_error(EVOASM_ERROR_TYPE_ARGUMENT, EVOASM_ERROR_CODE_NONE, NULL,
1401
+ "example arity mismatch (%d for %d)", input->arity, program->_input.arity);
1402
+ return false;
1403
+ }
1404
+
1405
+ for(i = 0; i < input->arity; i++) {
1406
+ if(input->types[i] != program->_input.types[i]) {
1407
+ evoasm_set_error(EVOASM_ERROR_TYPE_ARGUMENT, EVOASM_ERROR_CODE_NONE, NULL,
1408
+ "example type mismatch (%d != %d)", input->types[i], program->_input.types[i]);
1409
+ return false;
1410
+ }
1411
+ }
1412
+
1413
+ program->output_vals = evoasm_alloca(EVOASM_PROGRAM_OUTPUT_VALS_SIZE(input));
1414
+ signal_ctx.exception_mask = program->exception_mask;
1415
+ program->_signal_ctx = &signal_ctx;
1416
+
1417
+ if(!evoasm_program_emit(program, input, false, false, true, false)) {
1418
+ return false;
1419
+ }
1420
+
1421
+ // FIXME:
1422
+ if(kernel->n_output_regs == 0) {
1423
+ return true;
1424
+ }
1425
+
1426
+ evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_DEBUG);
1427
+ evoasm_signal_context_install(&signal_ctx, program->arch);
1428
+
1429
+ if(!evoasm_buf_protect(program->buf, EVOASM_MPROT_RX)) {
1430
+ evoasm_assert_not_reached();
1431
+ }
1432
+
1433
+ if(_EVOASM_SIGNAL_CONTEXT_TRY(&signal_ctx)) {
1434
+ evoasm_buf_exec(program->buf);
1435
+ evoasm_program_load_output(program,
1436
+ kernel,
1437
+ input,
1438
+ &program->_output,
1439
+ output);
1440
+ retval = true;
1441
+ } else {
1442
+ evoasm_debug("signaled\n");
1443
+ retval = false;
1444
+ }
1445
+
1446
+ if(!evoasm_buf_protect(program->buf, EVOASM_MPROT_RW)) {
1447
+ evoasm_assert_not_reached();
1448
+ }
1449
+
1450
+ evoasm_signal_context_uninstall(&signal_ctx);
1451
+
1452
+ program->_signal_ctx = NULL;
1453
+ program->output_vals = NULL;
1454
+
1455
+ return retval;
1456
+ }
1457
+
1458
+ static evoasm_success
1459
+ evoasm_search_eval_program(evoasm_search *search,
1460
+ evoasm_program *program,
1461
+ evoasm_loss *loss) {
1462
+
1463
+ evoasm_kernel *kernel = &program->kernels[program->params->size - 1];
1464
+
1465
+ if(!evoasm_program_emit(program, &search->params.program_input, true, true, true, true)) {
1466
+ *loss = INFINITY;
1467
+ return false;
1468
+ }
1469
+
1470
+ if(EVOASM_UNLIKELY(kernel->n_output_regs == 0)) {
1471
+ *loss = INFINITY;
1472
+ return true;
1473
+ }
1474
+
1475
+ //evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_INFO);
1476
+ {
1477
+ struct evoasm_signal_context *signal_ctx = (struct evoasm_signal_context *) program->_signal_ctx;
1478
+ signal_ctx->exception_mask = program->exception_mask;
1479
+
1480
+ if(_EVOASM_SIGNAL_CONTEXT_TRY((struct evoasm_signal_context *)program->_signal_ctx)) {
1481
+ evoasm_buf_exec(program->buf);
1482
+ *loss = evoasm_program_assess(program, &search->params.program_output);
1483
+ } else {
1484
+ evoasm_debug("program %d signaled", program->index);
1485
+ *loss = INFINITY;
1486
+ }
1487
+ }
1488
+ return true;
1489
+ }
1490
+
1491
+ static bool
1492
+ evoasm_kernel_param_x64_writes_p(evoasm_kernel_param *param, evoasm_reg_id reg_id, evoasm_x64_reg_modif_acc *reg_modif_acc) {
1493
+ evoasm_x64_inst *x64_inst = (evoasm_x64_inst *) param->inst;
1494
+ unsigned i;
1495
+
1496
+ for(i = 0; i < x64_inst->n_operands; i++) {
1497
+ evoasm_x64_operand *op = &x64_inst->operands[i];
1498
+ evoasm_x64_reg_id op_reg_id = evoasm_op_x64_reg_id(op, param);
1499
+
1500
+ if(op->acc_w && op_reg_id == reg_id && evoasm_x64_reg_modif_acc_uncovered_access(reg_modif_acc, op, param)) {
1501
+ evoasm_x64_reg_modif_acc_update(reg_modif_acc, op, param);
1502
+ return true;
1503
+ }
1504
+ }
1505
+ return false;
1506
+ }
1507
+
1508
+ static unsigned
1509
+ evoasm_program_x64_find_writers_(evoasm_program *program, evoasm_kernel *kernel, evoasm_reg_id reg_id,
1510
+ unsigned index, unsigned *writers) {
1511
+ unsigned len = 0;
1512
+ unsigned i, j;
1513
+
1514
+ for(i = 0; i <= index; i++) {
1515
+ j = index - i;
1516
+
1517
+ evoasm_kernel_param *param = &kernel->params->params[j];
1518
+ evoasm_x64_reg_modif_acc reg_modif_acc = {0};
1519
+
1520
+ if(evoasm_kernel_param_x64_writes_p(param, reg_id, &reg_modif_acc)) {
1521
+ writers[len++] = j;
1522
+ }
1523
+ }
1524
+ return len;
1525
+ }
1526
+
1527
+ static unsigned
1528
+ evoasm_program_x64_find_writers(evoasm_program *program, evoasm_kernel *kernel,
1529
+ evoasm_reg_id reg_id, unsigned index, unsigned *writers) {
1530
+
1531
+ return evoasm_program_x64_find_writers_(program, kernel, reg_id, index, writers);
1532
+ }
1533
+
1534
+
1535
+ typedef evoasm_bitmap1024 evoasm_mark_bitmap;
1536
+
1537
+ typedef struct {
1538
+ bool change;
1539
+ evoasm_bitmap512 inst_bitmaps[EVOASM_PROGRAM_MAX_SIZE];
1540
+ evoasm_bitmap256 output_reg_bitmaps[EVOASM_PROGRAM_MAX_SIZE];
1541
+ } _evoasm_program_intron_elimination_ctx;
1542
+
1543
+ static void
1544
+ evoasm_program_x64_mark_writers(evoasm_program *program, evoasm_kernel *kernel,
1545
+ evoasm_reg_id reg_id, unsigned index, _evoasm_program_intron_elimination_ctx *ctx) {
1546
+ unsigned i, j, k, l;
1547
+ unsigned writers[16];
1548
+
1549
+ unsigned writers_len = evoasm_program_x64_find_writers(program, kernel, reg_id, index, writers);
1550
+
1551
+ fprintf(stderr, "found %d writers\n", writers_len);
1552
+
1553
+ if(writers_len > 0) {
1554
+ for(i = 0; i < writers_len; i++) {
1555
+ unsigned writer_idx = writers[i];
1556
+ evoasm_bitmap *inst_bitmap = (evoasm_bitmap *) &ctx->inst_bitmaps[kernel->idx];
1557
+ if(evoasm_bitmap_get(inst_bitmap, writer_idx)) continue;
1558
+
1559
+ fprintf(stderr, "marking writer %d\n", writer_idx);
1560
+ evoasm_kernel_param *param = &kernel->params->params[writer_idx];
1561
+ evoasm_x64_inst *x64_inst = (evoasm_x64_inst *) param->inst;
1562
+ evoasm_bitmap_set(inst_bitmap, writer_idx);
1563
+ ctx->change = true;
1564
+
1565
+ fprintf(stderr, "checking writer operands %d\n", x64_inst->n_operands);
1566
+
1567
+ for(j = 0; j < x64_inst->n_operands; j++) {
1568
+ evoasm_x64_operand *op = &x64_inst->operands[j];
1569
+ evoasm_x64_reg_id op_reg_id = evoasm_op_x64_reg_id(op, param);
1570
+
1571
+ if(op->acc_r) {
1572
+ fprintf(stderr, "found r op\n");
1573
+ if(writer_idx > 0) {
1574
+ evoasm_program_x64_mark_writers(program, kernel, op_reg_id, writer_idx - 1, ctx);
1575
+ }
1576
+
1577
+ if(kernel->reg_info.x64[op_reg_id].input) {
1578
+ fprintf(stderr, "marking input reg %d\n", op_reg_id);
1579
+ unsigned trans_kernels_idcs[] = {(unsigned)(kernel->idx + 1),
1580
+ kernel->params->branch_kernel_idx};
1581
+ for(k = 0; k < EVOASM_ARY_LEN(trans_kernels_idcs); k++) {
1582
+ //evoasm_kernel *trans_kernel = &program->kernels[trans_kernels_idcs[k]];
1583
+ for(l = 0; l < EVOASM_X64_N_REGS; l++) {
1584
+ if(kernel->reg_info.x64[l].trans_regs[k] == op_reg_id) {
1585
+ evoasm_bitmap_set((evoasm_bitmap *) &ctx->output_reg_bitmaps[trans_kernels_idcs[k]], l);
1586
+ }
1587
+ }
1588
+ }
1589
+ } else {
1590
+ fprintf(stderr, "marking reg %d\n", op_reg_id);
1591
+ }
1592
+ }
1593
+ }
1594
+ }
1595
+ }
1596
+ }
1597
+
1598
+ static void
1599
+ evoasm_program_mark_writers(evoasm_program *program, evoasm_kernel *kernel,
1600
+ evoasm_reg_id reg_id, unsigned index, _evoasm_program_intron_elimination_ctx *ctx) {
1601
+ switch(program->arch->cls->id) {
1602
+ case EVOASM_ARCH_X64: {
1603
+ evoasm_program_x64_mark_writers(program, kernel, reg_id, index, ctx);
1604
+ break;
1605
+ }
1606
+ default:
1607
+ evoasm_assert_not_reached();
1608
+ }
1609
+ }
1610
+
1611
+ static evoasm_success
1612
+ evoasm_program_mark_kernel(evoasm_program *program, evoasm_kernel *kernel, _evoasm_program_intron_elimination_ctx *ctx) {
1613
+ unsigned i;
1614
+
1615
+ for(i = 0; i < EVOASM_X64_N_REGS; i++) {
1616
+ evoasm_bitmap *bitmap = (evoasm_bitmap *)&ctx->output_reg_bitmaps[kernel->idx];
1617
+ if(evoasm_bitmap_get(bitmap, i)) {
1618
+ fprintf(stderr, "marking bit %d of %d\n", i, kernel->idx);
1619
+ evoasm_program_mark_writers(program, kernel, (evoasm_reg_id) i, (unsigned)(kernel->params->size - 1), ctx);
1620
+ }
1621
+ }
1622
+
1623
+ return true;
1624
+ }
1625
+
1626
+ evoasm_success
1627
+ evoasm_program_eliminate_introns(evoasm_program *program) {
1628
+ unsigned i, j;
1629
+ unsigned last_kernel_idx = (unsigned) (program->params->size - 1);
1630
+ //evoasm_kernel *last_kernel = &program->kernels[last_kernel_idx];
1631
+
1632
+ _evoasm_program_intron_elimination_ctx ctx = {0};
1633
+
1634
+ {
1635
+ evoasm_bitmap *output_bitmap = (evoasm_bitmap *)&ctx.output_reg_bitmaps[last_kernel_idx];
1636
+ for(i = 0; i < program->_output.arity; i++) {
1637
+ evoasm_bitmap_set(output_bitmap, program->output_regs[i]);
1638
+ }
1639
+ }
1640
+
1641
+ do {
1642
+ i = last_kernel_idx;
1643
+ ctx.change = false;
1644
+ for(i = 0; i <= last_kernel_idx; i++) {
1645
+ j = last_kernel_idx - i;
1646
+ EVOASM_TRY(error, evoasm_program_mark_kernel, program,
1647
+ &program->kernels[j], &ctx);
1648
+ }
1649
+ } while(ctx.change);
1650
+
1651
+ /* sweep */
1652
+ for(i = 0; i <= last_kernel_idx; i++) {
1653
+ evoasm_kernel *kernel = &program->kernels[i];
1654
+ unsigned k;
1655
+ evoasm_bitmap *inst_bitmap = (evoasm_bitmap *) &ctx.inst_bitmaps[i];
1656
+
1657
+ for(k = 0, j = 0; j < kernel->params->size; j++) {
1658
+ if(evoasm_bitmap_get(inst_bitmap, j)) {
1659
+ kernel->params->params[k++] = kernel->params->params[j];
1660
+ }
1661
+ }
1662
+ fprintf(stderr, "kernel %d has now size %d\n", i, k);
1663
+ kernel->params->size = (evoasm_program_size) k;
1664
+ }
1665
+
1666
+
1667
+
1668
+ /* program is already prepared, must be reset before doing it again */
1669
+ evoasm_program_unprepare(program);
1670
+
1671
+ /* reemit, but keep previous mappings */
1672
+ if(!evoasm_program_emit(program, NULL, true, true, false, false)) {
1673
+ return false;
1674
+ }
1675
+
1676
+ return true;
1677
+ error:
1678
+ return false;
1679
+ }
1680
+
1681
+ static evoasm_success
1682
+ evoasm_search_eval_population(evoasm_search *search, unsigned char *programs,
1683
+ evoasm_loss max_loss, evoasm_search_result_func result_func,
1684
+ void *user_data) {
1685
+ unsigned i, j;
1686
+ struct evoasm_signal_context signal_ctx = {0};
1687
+ evoasm_population *pop = &search->pop;
1688
+ bool retval;
1689
+ unsigned n_examples = EVOASM_PROGRAM_INPUT_N(&search->params.program_input);
1690
+
1691
+ evoasm_signal_context_install(&signal_ctx, search->arch);
1692
+
1693
+ for(i = 0; i < search->params.pop_size; i++) {
1694
+ evoasm_loss loss;
1695
+ evoasm_program_params *program_params = _EVOASM_SEARCH_PROGRAM_PARAMS(search, programs, i);
1696
+
1697
+ /* encode solution */
1698
+ evoasm_program program = {
1699
+ .params = program_params,
1700
+ .index = i,
1701
+ .search_params = &search->params,
1702
+ .buf = &search->pop.buf,
1703
+ .body_buf = &search->pop.body_buf,
1704
+ .arch = search->arch,
1705
+ ._signal_ctx = &signal_ctx
1706
+ };
1707
+
1708
+ program.output_vals = pop->output_vals;
1709
+
1710
+ for(j = 0; j < program_params->size; j++) {
1711
+ evoasm_kernel *kernel = &program.kernels[j];
1712
+ kernel->params = _EVOASM_PROGRAM_PARAMS_KERNEL_PARAMS(program_params, search->params.max_kernel_size, j);
1713
+ kernel->idx = (evoasm_program_size) j;
1714
+ }
1715
+
1716
+ if(!evoasm_search_eval_program(search, &program, &loss)) {
1717
+ retval = false;
1718
+ goto done;
1719
+ }
1720
+
1721
+ pop->losses[i] = loss;
1722
+
1723
+ evoasm_debug("program %d has loss %lf", i, loss);
1724
+
1725
+ if(loss <= pop->best_loss) {
1726
+ pop->elite[pop->elite_pos++ % EVOASM_SEARCH_ELITE_SIZE] = i;
1727
+ pop->best_loss = loss;
1728
+ evoasm_debug("program %d has best loss %lf", i, loss);
1729
+ }
1730
+
1731
+ if(EVOASM_UNLIKELY(loss / n_examples <= max_loss)) {
1732
+ evoasm_info("program %d has best loss %lf", i, loss);
1733
+ program._output = search->params.program_output;
1734
+ program._input = search->params.program_input;
1735
+
1736
+ if(!result_func(&program, loss, user_data)) {
1737
+ retval = false;
1738
+ goto done;
1739
+ }
1740
+ }
1741
+ }
1742
+
1743
+ retval = true;
1744
+ done:
1745
+ evoasm_signal_context_uninstall(&signal_ctx);
1746
+ return retval;
1747
+ }
1748
+
1749
+ static void
1750
+ evoasm_search_select_parents(evoasm_search *search, uint32_t *parents) {
1751
+ uint32_t n = 0;
1752
+ unsigned i, j, k;
1753
+
1754
+ /* find out degree elite array is really filled */
1755
+ for(i = 0; i < EVOASM_SEARCH_ELITE_SIZE; i++) {
1756
+ if(search->pop.elite[i] == UINT32_MAX) {
1757
+ break;
1758
+ }
1759
+ }
1760
+
1761
+ /* fill possible free slots */
1762
+ for(j = i, k = 0; j < EVOASM_SEARCH_ELITE_SIZE; j++) {
1763
+ search->pop.elite[j] = search->pop.elite[k++ % i];
1764
+ }
1765
+
1766
+ j = 0;
1767
+ while(true) {
1768
+ for(i = 0; i < search->params.pop_size; i++) {
1769
+ uint32_t r = evoasm_prng32_rand(&search->pop.prng32);
1770
+ if(n >= search->params.pop_size) goto done;
1771
+ if(r < UINT32_MAX * ((search->pop.best_loss + 1.0) / (search->pop.losses[i] + 1.0))) {
1772
+ parents[n++] = i;
1773
+ //evoasm_info("selecting loss %f", search->pop.losses[i]);
1774
+ } else if(r < UINT32_MAX / 32) {
1775
+ parents[n++] = search->pop.elite[j++ % EVOASM_SEARCH_ELITE_SIZE];
1776
+ //evoasm_info("selecting elite loss %f", search->pop.losses[parents[n - 1]]);
1777
+ } else {
1778
+ //evoasm_info("discarding loss %f", search->pop.losses[i]);
1779
+ }
1780
+ }
1781
+ }
1782
+ done:;
1783
+ }
1784
+
1785
+ static void
1786
+ evoasm_search_mutate_kernel(evoasm_search *search, evoasm_kernel_params *child) {
1787
+ uint32_t r = evoasm_prng32_rand(&search->pop.prng32);
1788
+ evoasm_debug("mutating child: %u < %u", r, search->params.mutation_rate);
1789
+ if(r < search->params.mutation_rate) {
1790
+
1791
+ r = evoasm_prng32_rand(&search->pop.prng32);
1792
+ if(child->size > search->params.min_kernel_size && r < UINT32_MAX / 16) {
1793
+ uint32_t index = r % child->size;
1794
+
1795
+ if(index < (uint32_t) (child->size - 1)) {
1796
+ memmove(child->params + index, child->params + index + 1, (child->size - index - 1) * sizeof(evoasm_kernel_param));
1797
+ }
1798
+ child->size--;
1799
+ }
1800
+
1801
+ r = evoasm_prng32_rand(&search->pop.prng32);
1802
+ {
1803
+ evoasm_kernel_param *program_param = child->params + (r % child->size);
1804
+ evoasm_search_seed_kernel_param(search, program_param);
1805
+ }
1806
+ }
1807
+ }
1808
+
1809
+ static void
1810
+ evoasm_search_crossover_kernel(evoasm_search *search, evoasm_kernel_params *parent_a, evoasm_kernel_params *parent_b,
1811
+ evoasm_kernel_params *child) {
1812
+
1813
+ /* NOTE: parent_a must be the longer parent, i.e. parent_size_a >= parent_size_b */
1814
+ evoasm_kernel_size child_size;
1815
+ unsigned crossover_point, crossover_len, i;
1816
+
1817
+ assert(parent_a->size >= parent_b->size);
1818
+
1819
+ child_size = (evoasm_kernel_size)
1820
+ evoasm_prng32_rand_between(&search->pop.prng32,
1821
+ parent_b->size, parent_a->size);
1822
+
1823
+ assert(child_size > 0);
1824
+ assert(child_size >= parent_b->size);
1825
+
1826
+ /* offset for shorter parent */
1827
+ crossover_point = (unsigned) evoasm_prng32_rand_between(&search->pop.prng32,
1828
+ 0, child_size - parent_b->size);
1829
+ crossover_len = (unsigned) evoasm_prng32_rand_between(&search->pop.prng32,
1830
+ 0, parent_b->size);
1831
+
1832
+
1833
+ for(i = 0; i < child_size; i++) {
1834
+ unsigned index;
1835
+ evoasm_kernel_params *parent;
1836
+
1837
+ if(i < crossover_point || i >= crossover_point + crossover_len) {
1838
+ parent = parent_a;
1839
+ index = i;
1840
+ } else {
1841
+ parent = parent_b;
1842
+ index = i - crossover_point;
1843
+ }
1844
+ child->params[i] = parent->params[index];
1845
+ }
1846
+ child->size = child_size;
1847
+
1848
+ evoasm_search_mutate_kernel(search, child);
1849
+ }
1850
+
1851
+
1852
+ static void
1853
+ evoasm_search_crossover_program(evoasm_search *search, evoasm_program_params *parent_a, evoasm_program_params *parent_b,
1854
+ evoasm_program_params *child) {
1855
+
1856
+ /* NOTE: parent_a must be the longer parent, i.e. parent_size_a >= parent_size_b */
1857
+ evoasm_program_size child_size;
1858
+ unsigned i, max_kernel_size;
1859
+
1860
+
1861
+ assert(parent_a->size >= parent_b->size);
1862
+ assert(parent_a->size > 0);
1863
+ assert(parent_b->size > 0);
1864
+
1865
+ child_size = (evoasm_program_size)
1866
+ evoasm_prng32_rand_between(&search->pop.prng32,
1867
+ parent_b->size, parent_a->size);
1868
+
1869
+ assert(child_size > 0);
1870
+ assert(child_size >= parent_b->size);
1871
+
1872
+ max_kernel_size = search->params.max_kernel_size;
1873
+
1874
+ for(i = 0; i < child_size; i++) {
1875
+ evoasm_kernel_params *kernel_child = _EVOASM_PROGRAM_PARAMS_KERNEL_PARAMS(child, max_kernel_size, i);
1876
+
1877
+ if(i < parent_b->size) {
1878
+ evoasm_kernel_params *kernel_parent_a = _EVOASM_PROGRAM_PARAMS_KERNEL_PARAMS(parent_a, max_kernel_size, i);
1879
+ evoasm_kernel_params *kernel_parent_b = _EVOASM_PROGRAM_PARAMS_KERNEL_PARAMS(parent_b, max_kernel_size, i);
1880
+
1881
+ if(kernel_parent_a->size < kernel_parent_b->size) {
1882
+ evoasm_kernel_params *t = kernel_parent_a;
1883
+ kernel_parent_a = kernel_parent_b;
1884
+ kernel_parent_b = t;
1885
+ }
1886
+
1887
+ evoasm_search_crossover_kernel(search, kernel_parent_a, kernel_parent_b, kernel_child);
1888
+ } else {
1889
+ memcpy(kernel_child, parent_a, _EVOASM_KERNEL_SIZE(max_kernel_size));
1890
+ evoasm_search_mutate_kernel(search, kernel_child);
1891
+ }
1892
+ }
1893
+ child->size = child_size;
1894
+ }
1895
+
1896
+ static void
1897
+ evoasm_search_crossover(evoasm_search *search, evoasm_program_params *parent_a, evoasm_program_params *parent_b,
1898
+ evoasm_program_params *child_a, evoasm_program_params *child_b) {
1899
+
1900
+ if(parent_a->size < parent_b->size) {
1901
+ evoasm_program_params *t = parent_a;
1902
+ parent_a = parent_b;
1903
+ parent_b = t;
1904
+ }
1905
+
1906
+ //memcpy(_EVOASM_SEARCH_PROGRAM_PARAMS(search, programs, index), parent_a, _EVOASM_PROGRAM_SIZE(search));
1907
+ //memcpy(_EVOASM_SEARCH_PROGRAM_PARAMS(search, programs, index + 1), parent_a, _EVOASM_PROGRAM_SIZE(search));
1908
+
1909
+ evoasm_search_crossover_program(search, parent_a, parent_b, child_a);
1910
+ if(child_b != NULL) {
1911
+ evoasm_search_crossover_program(search, parent_a, parent_b, child_b);
1912
+ }
1913
+ }
1914
+
1915
+ static void
1916
+ evoasm_search_combine_parents(evoasm_search *search, unsigned char *programs, uint32_t *parents) {
1917
+ unsigned i;
1918
+
1919
+ for(i = 0; i < search->params.pop_size; i += 2) {
1920
+ evoasm_program_params *parent_a = _EVOASM_SEARCH_PROGRAM_PARAMS(search, programs, parents[i]);
1921
+ assert(parent_a->size > 0);
1922
+ evoasm_program_params *parent_b = _EVOASM_SEARCH_PROGRAM_PARAMS(search, programs, parents[i + 1]);
1923
+ evoasm_program_params *child_a = _EVOASM_SEARCH_PROGRAM_PARAMS(search, search->pop.programs_swap, i);
1924
+ evoasm_program_params *child_b = _EVOASM_SEARCH_PROGRAM_PARAMS(search, search->pop.programs_swap, i + 1);
1925
+ evoasm_search_crossover(search, parent_a, parent_b, child_a, child_b);
1926
+
1927
+ assert(child_a->size > 0);
1928
+ assert(child_b->size > 0);
1929
+ }
1930
+ }
1931
+
1932
+ static void
1933
+ evoasm_population_swap(evoasm_population *pop, unsigned char **programs) {
1934
+ unsigned char *programs_tmp;
1935
+
1936
+ programs_tmp = pop->programs_swap;
1937
+ pop->programs_swap = *programs;
1938
+ *programs = programs_tmp;
1939
+ }
1940
+
1941
+ static evoasm_loss
1942
+ evoasm_search_population_loss(evoasm_search *search, unsigned *n_inf) {
1943
+ unsigned i;
1944
+ double scale = 1.0 / search->params.pop_size;
1945
+ double pop_loss = 0.0;
1946
+ *n_inf = 0;
1947
+ for(i = 0; i < search->params.pop_size; i++) {
1948
+ double loss = search->pop.losses[i];
1949
+ if(loss != INFINITY) {
1950
+ pop_loss += scale * loss;
1951
+ }
1952
+ else {
1953
+ (*n_inf)++;
1954
+ }
1955
+ }
1956
+
1957
+ return pop_loss;
1958
+ }
1959
+
1960
+ static void
1961
+ evoasm_search_new_generation(evoasm_search *search, unsigned char **programs) {
1962
+ uint32_t *parents = alloca(search->params.pop_size * sizeof(uint32_t));
1963
+ evoasm_search_select_parents(search, parents);
1964
+
1965
+ #if 0
1966
+ {
1967
+ double scale = 1.0 / search->params.pop_size;
1968
+ double pop_loss = 0.0;
1969
+ unsigned n_inf = 0;
1970
+ for(i = 0; i < search->params.pop_size; i++) {
1971
+ double loss = search->pop.losses[parents[i]];
1972
+ if(loss != INFINITY) {
1973
+ pop_loss += scale * loss;
1974
+ }
1975
+ else {
1976
+ n_inf++;
1977
+ }
1978
+ }
1979
+
1980
+ evoasm_info("population selected loss: %g/%u", pop_loss, n_inf);
1981
+ }
1982
+
1983
+ unsigned i;
1984
+ for(i = 0; i < search->params.pop_size; i++) {
1985
+ evoasm_program_params *program_params = _EVOASM_SEARCH_PROGRAM_PARAMS(search, search->pop.programs, parents[i]);
1986
+ assert(program_params->size > 0);
1987
+ }
1988
+ #endif
1989
+
1990
+ evoasm_search_combine_parents(search, *programs, parents);
1991
+ evoasm_population_swap(&search->pop, programs);
1992
+ }
1993
+
1994
+ #define EVOASM_SEARCH_CONVERGENCE_THRESHOLD 0.03
1995
+
1996
+ static evoasm_success
1997
+ evoasm_search_start_(evoasm_search *search, unsigned char **programs,
1998
+ evoasm_loss max_loss, evoasm_search_result_func result_func,
1999
+ void *user_data) {
2000
+ unsigned gen;
2001
+ evoasm_loss last_loss = 0.0;
2002
+ unsigned ups = 0;
2003
+
2004
+ for(gen = 0;;gen++) {
2005
+ if(!evoasm_search_eval_population(search, *programs, max_loss, result_func, user_data)) {
2006
+ return true;
2007
+ }
2008
+
2009
+ if(gen % 256 == 0) {
2010
+ unsigned n_inf;
2011
+ evoasm_loss loss = evoasm_search_population_loss(search, &n_inf);
2012
+ evoasm_info("population loss: %g/%u\n\n", loss, n_inf);
2013
+
2014
+ if(gen > 0) {
2015
+ if(last_loss <= loss) {
2016
+ ups++;
2017
+ }
2018
+ }
2019
+
2020
+ last_loss = loss;
2021
+
2022
+ if(ups >= 3) {
2023
+ evoasm_info("reached convergence\n");
2024
+ return false;
2025
+ }
2026
+ }
2027
+
2028
+ evoasm_search_new_generation(search, programs);
2029
+ }
2030
+ }
2031
+
2032
+ static void
2033
+ evoasm_search_merge(evoasm_search *search) {
2034
+ unsigned i;
2035
+
2036
+ evoasm_info("merging\n");
2037
+
2038
+ for(i = 0; i < search->params.pop_size; i++) {
2039
+ evoasm_program_params *parent_a = _EVOASM_SEARCH_PROGRAM_PARAMS(search, search->pop.programs_main, i);
2040
+ evoasm_program_params *parent_b = _EVOASM_SEARCH_PROGRAM_PARAMS(search, search->pop.programs_aux, i);
2041
+
2042
+ evoasm_program_params *child = _EVOASM_SEARCH_PROGRAM_PARAMS(search, search->pop.programs_swap, i);
2043
+ evoasm_search_crossover(search, parent_a, parent_b, child, NULL);
2044
+ }
2045
+ evoasm_population_swap(&search->pop, &search->pop.programs_main);
2046
+ }
2047
+
2048
+ void
2049
+ evoasm_search_start(evoasm_search *search, evoasm_loss max_loss, evoasm_search_result_func result_func, void *user_data) {
2050
+
2051
+ unsigned kalpa;
2052
+
2053
+ evoasm_search_seed(search, search->pop.programs_main);
2054
+
2055
+ for(kalpa = 0;;kalpa++) {
2056
+ if(!evoasm_search_start_(search, &search->pop.programs_main, max_loss, result_func, user_data)) {
2057
+ evoasm_search_seed(search, search->pop.programs_aux);
2058
+ evoasm_info("starting aux search");
2059
+ if(!evoasm_search_start_(search, &search->pop.programs_aux, max_loss, result_func, user_data)) {
2060
+ evoasm_search_merge(search);
2061
+ }
2062
+ else {
2063
+ goto done;
2064
+ }
2065
+ }
2066
+ else {
2067
+ goto done;
2068
+ }
2069
+ }
2070
+
2071
+ done:;
2072
+ }
2073
+
2074
+ evoasm_success
2075
+ evoasm_search_init(evoasm_search *search, evoasm_arch *arch, evoasm_search_params *search_params) {
2076
+ unsigned i, j, k;
2077
+ evoasm_domain cloned_domain;
2078
+ evoasm_arch_params_bitmap active_params = {0};
2079
+
2080
+ if(search_params->max_program_size > EVOASM_PROGRAM_MAX_SIZE) {
2081
+ evoasm_set_error(EVOASM_ERROR_TYPE_ARGUMENT, EVOASM_ERROR_CODE_NONE,
2082
+ NULL, "Program size cannot exceed %d", EVOASM_PROGRAM_MAX_SIZE);
2083
+ }
2084
+
2085
+ search->params = *search_params;
2086
+ search->arch = arch;
2087
+
2088
+ EVOASM_TRY(fail, evoasm_population_init, &search->pop, search);
2089
+
2090
+ for(i = 0; i < search_params->params_len; i++) {
2091
+ evoasm_bitmap_set((evoasm_bitmap *) &active_params, search_params->params[i]);
2092
+ }
2093
+
2094
+ search->domains = evoasm_calloc((size_t)(search->params.insts_len * search->params.params_len),
2095
+ sizeof(evoasm_domain));
2096
+
2097
+ for(i = 0; i < search->params.insts_len; i++) {
2098
+ evoasm_inst *inst = search->params.insts[i];
2099
+ for(j = 0; j < search->params.params_len; j++) {
2100
+ evoasm_domain *inst_domain = &search->domains[i * search->params.params_len + j];
2101
+ evoasm_arch_param_id param_id =search->params.params[j];
2102
+ for(k = 0; k < inst->params_len; k++) {
2103
+ evoasm_arch_param *param = &inst->params[k];
2104
+ if(param->id == param_id) {
2105
+ evoasm_domain *user_domain = search->params.domains[param_id];
2106
+ if(user_domain != NULL) {
2107
+ evoasm_domain_clone(user_domain, &cloned_domain);
2108
+ evoasm_domain_intersect(&cloned_domain, param->domain, inst_domain);
2109
+ } else {
2110
+ evoasm_domain_clone(param->domain, inst_domain);
2111
+ }
2112
+ goto found;
2113
+ }
2114
+ }
2115
+ /* not found */
2116
+ inst_domain->type = EVOASM_N_DOMAIN_TYPES;
2117
+ found:;
2118
+ }
2119
+ }
2120
+
2121
+ assert(search->params.min_program_size > 0);
2122
+ assert(search->params.min_program_size <= search->params.max_program_size);
2123
+
2124
+ return true;
2125
+ fail:
2126
+ return false;
2127
+ }
2128
+
2129
+ evoasm_success
2130
+ evoasm_search_destroy(evoasm_search *search) {
2131
+ unsigned i;
2132
+
2133
+ for(i = 0; i < EVOASM_ARCH_MAX_PARAMS; i++) {
2134
+ evoasm_free(search->params.domains[i]);
2135
+ }
2136
+ evoasm_free(search->params.program_input.vals);
2137
+ evoasm_free(search->params.program_output.vals);
2138
+ evoasm_free(search->params.params);
2139
+ evoasm_free(search->domains);
2140
+ EVOASM_TRY(error, evoasm_population_destroy, &search->pop);
2141
+
2142
+ return true;
2143
+ error:
2144
+ return false;
2145
+ }