evoasm 0.0.2.pre7 → 0.1.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gdbinit +41 -0
- data/.gitignore +1 -2
- data/.gitmodules +3 -0
- data/.rubocop.yml +8 -0
- data/Gemfile +4 -0
- data/LICENSE.md +660 -0
- data/Makefile +1 -1
- data/README.md +17 -9
- data/Rakefile +39 -107
- data/bin/gdb +1 -1
- data/bin/gdb_loop +4 -0
- data/docs/FindingInstructions.md +17 -0
- data/docs/JIT.md +14 -0
- data/docs/SymbolicRegression.md +102 -0
- data/docs/Visualization.md +29 -0
- data/docs/examples/bit_insts.rb +44 -0
- data/docs/examples/jit.rb +26 -0
- data/docs/examples/loss.gif +0 -0
- data/docs/examples/program.png +0 -0
- data/docs/examples/sym_reg.rb +64 -0
- data/docs/examples/vis.rb +38 -0
- data/evoasm.gemspec +21 -15
- data/ext/evoasm_ext/Rakefile +3 -0
- data/ext/evoasm_ext/compile.rake +35 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-alloc.c +226 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-alloc.h +84 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-arch.c +52 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-arch.h +101 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-bitmap.h +158 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-buf.c +204 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-buf.h +109 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-domain.c +124 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-domain.h +279 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-error.c +65 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-error.h +108 -0
- data/ext/evoasm_ext/{evoasm-log.c → libevoasm/src/evoasm-log.c} +36 -18
- data/ext/evoasm_ext/libevoasm/src/evoasm-log.h +93 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-param.c +22 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-param.h +33 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-pop-params.c +192 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-pop-params.h +60 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-pop.c +1323 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-pop.h +107 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-program-io.c +116 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-program-io.h +60 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-program.c +1827 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-program.h +167 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-rand.c +65 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-rand.h +76 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-signal.c +106 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-signal.h +58 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-util.h +112 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-x64.c +925 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm-x64.h +277 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm.c +28 -0
- data/ext/evoasm_ext/libevoasm/src/evoasm.h +35 -0
- data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-enums.h +2077 -0
- data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-insts.c +191203 -0
- data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-insts.h +1713 -0
- data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-misc.c +348 -0
- data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-misc.h +93 -0
- data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-params.c +51 -0
- data/ext/evoasm_ext/libevoasm/src/gen/evoasm-x64-params.h +509 -0
- data/lib/evoasm.rb +28 -11
- data/lib/evoasm/buffer.rb +105 -0
- data/lib/evoasm/capstone.rb +100 -0
- data/lib/evoasm/domain.rb +116 -0
- data/lib/evoasm/error.rb +37 -16
- data/lib/evoasm/exception_error.rb +19 -0
- data/lib/evoasm/ffi_ext.rb +53 -0
- data/lib/evoasm/libevoasm.rb +286 -0
- data/lib/evoasm/libevoasm/x64_enums.rb +1967 -0
- data/lib/evoasm/parameter.rb +20 -0
- data/lib/evoasm/population.rb +145 -0
- data/lib/evoasm/population/parameters.rb +227 -0
- data/lib/evoasm/population/plotter.rb +89 -0
- data/lib/evoasm/prng.rb +64 -0
- data/lib/evoasm/program.rb +195 -12
- data/lib/evoasm/program/io.rb +144 -0
- data/lib/evoasm/test.rb +8 -0
- data/lib/evoasm/version.rb +1 -1
- data/lib/evoasm/x64.rb +115 -0
- data/lib/evoasm/x64/cpu_state.rb +95 -0
- data/lib/evoasm/x64/instruction.rb +109 -0
- data/lib/evoasm/x64/operand.rb +156 -0
- data/lib/evoasm/x64/parameters.rb +211 -0
- data/test/helpers/population_helper.rb +128 -0
- data/test/helpers/test_helper.rb +1 -0
- data/test/helpers/x64_helper.rb +24 -0
- data/test/integration/bitwise_reverse_test.rb +41 -0
- data/test/integration/gcd_test.rb +52 -0
- data/test/integration/popcnt_test.rb +46 -0
- data/test/integration/sym_reg_test.rb +68 -0
- data/test/unit/evoasm/buffer_test.rb +48 -0
- data/test/unit/evoasm/capstone_test.rb +18 -0
- data/test/unit/evoasm/domain_test.rb +55 -0
- data/test/unit/evoasm/population/parameters_test.rb +106 -0
- data/test/unit/evoasm/population_test.rb +96 -0
- data/test/unit/evoasm/prng_test.rb +47 -0
- data/test/unit/evoasm/x64/cpu_state_test.rb +73 -0
- data/test/unit/evoasm/x64/encoding_test.rb +320 -0
- data/test/unit/evoasm/x64/instruction_access_test.rb +177 -0
- data/test/unit/evoasm/x64/instruction_encoding_test.rb +780 -0
- data/test/unit/evoasm/x64/instruction_test.rb +62 -0
- data/test/unit/evoasm/x64/parameters_test.rb +65 -0
- data/test/unit/evoasm/x64_test.rb +52 -0
- metadata +195 -89
- data/Gemfile.rake +0 -8
- data/Gemfile.rake.lock +0 -51
- data/LICENSE.txt +0 -373
- data/data/tables/README.md +0 -19
- data/data/tables/x64.csv +0 -1684
- data/data/templates/evoasm-x64.c.erb +0 -319
- data/data/templates/evoasm-x64.h.erb +0 -126
- data/examples/abs.yml +0 -20
- data/examples/popcnt.yml +0 -17
- data/examples/sym_reg.yml +0 -26
- data/exe/evoasm-search +0 -13
- data/ext/evoasm_ext/evoasm-alloc.c +0 -145
- data/ext/evoasm_ext/evoasm-alloc.h +0 -59
- data/ext/evoasm_ext/evoasm-arch.c +0 -44
- data/ext/evoasm_ext/evoasm-arch.h +0 -161
- data/ext/evoasm_ext/evoasm-bitmap.h +0 -114
- data/ext/evoasm_ext/evoasm-buf.c +0 -130
- data/ext/evoasm_ext/evoasm-buf.h +0 -47
- data/ext/evoasm_ext/evoasm-error.c +0 -31
- data/ext/evoasm_ext/evoasm-error.h +0 -75
- data/ext/evoasm_ext/evoasm-free-list.c.tmpl +0 -121
- data/ext/evoasm_ext/evoasm-free-list.h.tmpl +0 -86
- data/ext/evoasm_ext/evoasm-log.h +0 -69
- data/ext/evoasm_ext/evoasm-misc.c +0 -23
- data/ext/evoasm_ext/evoasm-misc.h +0 -282
- data/ext/evoasm_ext/evoasm-param.h +0 -37
- data/ext/evoasm_ext/evoasm-search.c +0 -2145
- data/ext/evoasm_ext/evoasm-search.h +0 -214
- data/ext/evoasm_ext/evoasm-util.h +0 -40
- data/ext/evoasm_ext/evoasm-x64.c +0 -275624
- data/ext/evoasm_ext/evoasm-x64.h +0 -5436
- data/ext/evoasm_ext/evoasm.c +0 -7
- data/ext/evoasm_ext/evoasm.h +0 -23
- data/ext/evoasm_ext/evoasm_ext.c +0 -1757
- data/ext/evoasm_ext/extconf.rb +0 -31
- data/lib/evoasm/cli.rb +0 -6
- data/lib/evoasm/cli/search.rb +0 -127
- data/lib/evoasm/core_ext.rb +0 -1
- data/lib/evoasm/core_ext/array.rb +0 -9
- data/lib/evoasm/core_ext/integer.rb +0 -10
- data/lib/evoasm/core_ext/kwstruct.rb +0 -13
- data/lib/evoasm/core_ext/range.rb +0 -5
- data/lib/evoasm/examples.rb +0 -27
- data/lib/evoasm/gen.rb +0 -8
- data/lib/evoasm/gen/enum.rb +0 -169
- data/lib/evoasm/gen/name_util.rb +0 -80
- data/lib/evoasm/gen/state.rb +0 -176
- data/lib/evoasm/gen/state_dsl.rb +0 -152
- data/lib/evoasm/gen/strio.rb +0 -27
- data/lib/evoasm/gen/translator.rb +0 -1102
- data/lib/evoasm/gen/version.rb +0 -5
- data/lib/evoasm/gen/x64.rb +0 -237
- data/lib/evoasm/gen/x64/funcs.rb +0 -495
- data/lib/evoasm/gen/x64/inst.rb +0 -781
- data/lib/evoasm/search.rb +0 -40
- data/lib/evoasm/tasks/gen_task.rb +0 -86
- data/lib/evoasm/tasks/template_task.rb +0 -52
- data/test/test_helper.rb +0 -1
- data/test/x64/test_helper.rb +0 -19
- data/test/x64/x64_test.rb +0 -87
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
|
|
3
|
+
*
|
|
4
|
+
* This program is free software: you can redistribute it and/or modify
|
|
5
|
+
* it under the terms of the GNU Affero General Public License as published by
|
|
6
|
+
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
* (at your option) any later version.
|
|
8
|
+
*
|
|
9
|
+
* This program is distributed in the hope that it will be useful,
|
|
10
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
* GNU Affero General Public License for more details.
|
|
13
|
+
*
|
|
14
|
+
* You should have received a copy of the GNU Affero General Public License
|
|
15
|
+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#pragma once
|
|
19
|
+
|
|
20
|
+
#include <stdalign.h>
|
|
21
|
+
#include "evoasm-error.h"
|
|
22
|
+
#include "evoasm-pop-params.h"
|
|
23
|
+
|
|
24
|
+
typedef struct {
|
|
25
|
+
evoasm_loss_t *samples;
|
|
26
|
+
uint8_t *counters;
|
|
27
|
+
} evoasm_pop_loss_data_t;
|
|
28
|
+
|
|
29
|
+
typedef struct {
|
|
30
|
+
int16_t *jmp_offs;
|
|
31
|
+
uint8_t *jmp_cond;
|
|
32
|
+
} evoasm_pop_program_data_t;
|
|
33
|
+
|
|
34
|
+
typedef struct {
|
|
35
|
+
float *pheromones;
|
|
36
|
+
uint16_t *sizes;
|
|
37
|
+
evoasm_pop_program_data_t program_data;
|
|
38
|
+
} evoasm_pop_module_data_t;
|
|
39
|
+
|
|
40
|
+
typedef struct {
|
|
41
|
+
evoasm_inst_id_t *insts;
|
|
42
|
+
union {
|
|
43
|
+
evoasm_x64_basic_params_t *x64;
|
|
44
|
+
void *data;
|
|
45
|
+
} params;
|
|
46
|
+
} evoasm_pop_kernel_data_t;
|
|
47
|
+
|
|
48
|
+
struct evoasm_deme_s {
|
|
49
|
+
evoasm_prng_t prng;
|
|
50
|
+
uint16_t *blessed_indiv_idxs;
|
|
51
|
+
uint16_t *doomed_indiv_idxs;
|
|
52
|
+
evoasm_pop_program_data_t parent_program_data;
|
|
53
|
+
evoasm_pop_kernel_data_t parent_kernel_data;
|
|
54
|
+
evoasm_program_t program;
|
|
55
|
+
uint64_t *error_counters;
|
|
56
|
+
uint64_t error_counter;
|
|
57
|
+
evoasm_pop_loss_data_t loss_data;
|
|
58
|
+
evoasm_pop_program_data_t program_data;
|
|
59
|
+
evoasm_pop_kernel_data_t kernel_data;
|
|
60
|
+
evoasm_loss_t *top_losses;
|
|
61
|
+
|
|
62
|
+
evoasm_loss_t best_loss;
|
|
63
|
+
evoasm_pop_program_data_t best_program_data;
|
|
64
|
+
evoasm_pop_kernel_data_t best_kernel_data;
|
|
65
|
+
|
|
66
|
+
uint16_t n_doomed_indivs;
|
|
67
|
+
uint16_t n_blessed_indivs;
|
|
68
|
+
uint16_t n_examples;
|
|
69
|
+
evoasm_arch_id_t arch_id;
|
|
70
|
+
evoasm_pop_params_t *params;
|
|
71
|
+
evoasm_domain_t *domains;
|
|
72
|
+
} evoasm_aligned(EVOASM_CACHE_LINE_SIZE) ;
|
|
73
|
+
|
|
74
|
+
typedef struct evoasm_deme_s evoasm_deme_t;
|
|
75
|
+
|
|
76
|
+
typedef struct evoasm_pop_s {
|
|
77
|
+
evoasm_pop_params_t *params;
|
|
78
|
+
evoasm_domain_t *domains;
|
|
79
|
+
evoasm_deme_t *demes;
|
|
80
|
+
evoasm_pop_module_data_t module_data;
|
|
81
|
+
bool seeded : 1;
|
|
82
|
+
evoasm_loss_t *summary_losses;
|
|
83
|
+
|
|
84
|
+
} evoasm_pop_t;
|
|
85
|
+
|
|
86
|
+
evoasm_success_t
|
|
87
|
+
evoasm_pop_init(evoasm_pop_t *pop,
|
|
88
|
+
evoasm_arch_id_t arch_id,
|
|
89
|
+
evoasm_pop_params_t *params);
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
evoasm_success_t
|
|
93
|
+
evoasm_pop_eval(evoasm_pop_t *pop);
|
|
94
|
+
|
|
95
|
+
void
|
|
96
|
+
evoasm_pop_next_gen(evoasm_pop_t *pop);
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
evoasm_success_t
|
|
100
|
+
evoasm_pop_seed(evoasm_pop_t *pop);
|
|
101
|
+
|
|
102
|
+
void
|
|
103
|
+
evoasm_pop_destroy(evoasm_pop_t *pop);
|
|
104
|
+
|
|
105
|
+
//void
|
|
106
|
+
//evoasm_pop_inject(evoasm_pop_t *pop, evoasm_indiv_t *indiv, size_t indiv_size, evoasm_loss_t loss);
|
|
107
|
+
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
|
|
3
|
+
*
|
|
4
|
+
* This program is free software: you can redistribute it and/or modify
|
|
5
|
+
* it under the terms of the GNU Affero General Public License as published by
|
|
6
|
+
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
* (at your option) any later version.
|
|
8
|
+
*
|
|
9
|
+
* This program is distributed in the hope that it will be useful,
|
|
10
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
* GNU Affero General Public License for more details.
|
|
13
|
+
*
|
|
14
|
+
* You should have received a copy of the GNU Affero General Public License
|
|
15
|
+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include "evoasm-alloc.h"
|
|
19
|
+
#include "evoasm-program-io.h"
|
|
20
|
+
#include <stdarg.h>
|
|
21
|
+
|
|
22
|
+
static const char *const _evoasm_example_type_names[] = {
|
|
23
|
+
"i64",
|
|
24
|
+
"u64",
|
|
25
|
+
"f64"
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
evoasm_program_io_t *
|
|
29
|
+
evoasm_program_io_alloc(size_t len) {
|
|
30
|
+
evoasm_program_io_t *program_io = evoasm_malloc(sizeof(evoasm_program_io_t) + len * sizeof(evoasm_program_io_val_t));
|
|
31
|
+
program_io->len = (uint16_t) len;
|
|
32
|
+
|
|
33
|
+
return program_io;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
evoasm_success_t
|
|
37
|
+
evoasm_program_io_init(evoasm_program_io_t *program_io, size_t arity, ...) {
|
|
38
|
+
va_list args;
|
|
39
|
+
bool retval = true;
|
|
40
|
+
|
|
41
|
+
if(arity > EVOASM_PROGRAM_IO_MAX_ARITY) {
|
|
42
|
+
evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
|
|
43
|
+
"Maximum arity exceeded (%zu > %d)", arity, EVOASM_PROGRAM_IO_MAX_ARITY);
|
|
44
|
+
retval = false;
|
|
45
|
+
goto done;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
program_io->arity = (uint8_t) arity;
|
|
49
|
+
|
|
50
|
+
va_start(args, arity);
|
|
51
|
+
for(size_t i = 0; i < program_io->len; i++) {
|
|
52
|
+
size_t type_idx = i % arity;
|
|
53
|
+
evoasm_program_io_val_type_t type = va_arg(args, evoasm_program_io_val_type_t);
|
|
54
|
+
evoasm_program_io_val_t val;
|
|
55
|
+
switch(type) {
|
|
56
|
+
case EVOASM_PROGRAM_IO_VAL_TYPE_F64:
|
|
57
|
+
val.f64 = va_arg(args, double);
|
|
58
|
+
break;
|
|
59
|
+
case EVOASM_PROGRAM_IO_VAL_TYPE_I64:
|
|
60
|
+
val.i64 = va_arg(args, int64_t);
|
|
61
|
+
break;
|
|
62
|
+
case EVOASM_PROGRAM_IO_VAL_TYPE_U64:
|
|
63
|
+
val.u64 = va_arg(args, uint64_t);
|
|
64
|
+
break;
|
|
65
|
+
default:
|
|
66
|
+
evoasm_assert_not_reached();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
program_io->vals[i] = val;
|
|
70
|
+
|
|
71
|
+
if(i >= arity) {
|
|
72
|
+
evoasm_program_io_val_type_t prev_type = program_io->types[type_idx];
|
|
73
|
+
|
|
74
|
+
if(prev_type != type) {
|
|
75
|
+
evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
|
|
76
|
+
"Example value type mismatch (previously %s, now %s)",
|
|
77
|
+
_evoasm_example_type_names[prev_type], _evoasm_example_type_names[type]);
|
|
78
|
+
retval = false;
|
|
79
|
+
goto done;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
program_io->types[type_idx] = type;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
done:
|
|
87
|
+
va_end(args);
|
|
88
|
+
return retval;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
double
|
|
92
|
+
evoasm_program_io_get_value_f64(evoasm_program_io_t *program_io, size_t idx) {
|
|
93
|
+
return program_io->vals[idx].f64;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
int64_t
|
|
97
|
+
evoasm_program_io_get_value_i64(evoasm_program_io_t *program_io, size_t idx) {
|
|
98
|
+
return program_io->vals[idx].i64;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
void
|
|
102
|
+
evoasm_program_io_destroy(evoasm_program_io_t *program_io) {
|
|
103
|
+
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
evoasm_program_io_val_type_t
|
|
107
|
+
evoasm_program_io_get_type(evoasm_program_io_t *program_io, size_t idx) {
|
|
108
|
+
return program_io->types[idx % program_io->arity];
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
EVOASM_DEF_FREE_FUNC(program_io)
|
|
112
|
+
|
|
113
|
+
EVOASM_DEF_GETTER(program_io, arity, size_t)
|
|
114
|
+
|
|
115
|
+
EVOASM_DEF_GETTER(program_io, len, size_t)
|
|
116
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
|
|
3
|
+
*
|
|
4
|
+
* This program is free software: you can redistribute it and/or modify
|
|
5
|
+
* it under the terms of the GNU Affero General Public License as published by
|
|
6
|
+
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
* (at your option) any later version.
|
|
8
|
+
*
|
|
9
|
+
* This program is distributed in the hope that it will be useful,
|
|
10
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
* GNU Affero General Public License for more details.
|
|
13
|
+
*
|
|
14
|
+
* You should have received a copy of the GNU Affero General Public License
|
|
15
|
+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#pragma once
|
|
19
|
+
|
|
20
|
+
#include <stdint.h>
|
|
21
|
+
|
|
22
|
+
#define EVOASM_PROGRAM_IO_MAX_ARITY 8
|
|
23
|
+
|
|
24
|
+
typedef enum {
|
|
25
|
+
EVOASM_PROGRAM_IO_VAL_TYPE_I64,
|
|
26
|
+
EVOASM_PROGRAM_IO_VAL_TYPE_U64,
|
|
27
|
+
EVOASM_PROGRAM_IO_VAL_TYPE_F64,
|
|
28
|
+
} evoasm_program_io_val_type_t;
|
|
29
|
+
|
|
30
|
+
typedef union {
|
|
31
|
+
double f64;
|
|
32
|
+
int64_t i64;
|
|
33
|
+
uint64_t u64;
|
|
34
|
+
} evoasm_program_io_val_t;
|
|
35
|
+
|
|
36
|
+
typedef struct {
|
|
37
|
+
uint8_t arity;
|
|
38
|
+
uint16_t len;
|
|
39
|
+
evoasm_program_io_val_type_t types[EVOASM_PROGRAM_IO_MAX_ARITY];
|
|
40
|
+
evoasm_program_io_val_t vals[1];
|
|
41
|
+
} evoasm_program_io_t;
|
|
42
|
+
|
|
43
|
+
#define EVOASM_PROGRAM_OUTPUT_MAX_ARITY EVOASM_PROGRAM_IO_MAX_ARITY
|
|
44
|
+
#define EVOASM_PROGRAM_INPUT_MAX_ARITY EVOASM_PROGRAM_IO_MAX_ARITY
|
|
45
|
+
|
|
46
|
+
typedef evoasm_program_io_t evoasm_program_output_t;
|
|
47
|
+
typedef evoasm_program_io_t evoasm_program_input_t;
|
|
48
|
+
|
|
49
|
+
#define EVOASM_PROGRAM_IO_N_EXAMPLES(program_io) ((size_t)((program_io)->len / (program_io)->arity))
|
|
50
|
+
#define EVOASM_PROGRAM_INPUT_N_TUPLES(program_input) EVOASM_PROGRAM_IO_N_EXAMPLES((evoasm_program_io_t *)program_input)
|
|
51
|
+
#define EVOASM_PROGRAM_OUTPUT_N_TUPLES(program_output) EVOASM_PROGRAM_IO_N_EXAMPLES((evoasm_program_io_t *)program_output)
|
|
52
|
+
|
|
53
|
+
evoasm_program_io_t *
|
|
54
|
+
evoasm_program_io_alloc(size_t len);
|
|
55
|
+
|
|
56
|
+
void
|
|
57
|
+
evoasm_program_io_destroy(evoasm_program_io_t *program_io);
|
|
58
|
+
|
|
59
|
+
#define evoasm_program_output_destroy(program_output) \
|
|
60
|
+
evoasm_program_io_destroy((evoasm_program_io *)program_output)
|
|
@@ -0,0 +1,1827 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (C) 2016 Julian Aron Prenner <jap@polyadic.com>
|
|
3
|
+
*
|
|
4
|
+
* This program is free software: you can redistribute it and/or modify
|
|
5
|
+
* it under the terms of the GNU Affero General Public License as published by
|
|
6
|
+
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
+
* (at your option) any later version.
|
|
8
|
+
*
|
|
9
|
+
* This program is distributed in the hope that it will be useful,
|
|
10
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
+
* GNU Affero General Public License for more details.
|
|
13
|
+
*
|
|
14
|
+
* You should have received a copy of the GNU Affero General Public License
|
|
15
|
+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include "evoasm-signal.h"
|
|
19
|
+
#include "evoasm-program.h"
|
|
20
|
+
#include "evoasm-arch.h"
|
|
21
|
+
#include "evoasm.h"
|
|
22
|
+
#include "evoasm-x64.h"
|
|
23
|
+
#include "evoasm-param.h"
|
|
24
|
+
#include "evoasm-program-io.h"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
EVOASM_DEF_LOG_TAG("program")
|
|
28
|
+
|
|
29
|
+
static inline double
|
|
30
|
+
evoasm_program_io_val_to_dbl(evoasm_program_io_val_t io_val, evoasm_program_io_val_type_t io_val_type) {
|
|
31
|
+
switch(io_val_type) {
|
|
32
|
+
case EVOASM_PROGRAM_IO_VAL_TYPE_F64:
|
|
33
|
+
return io_val.f64;
|
|
34
|
+
case EVOASM_PROGRAM_IO_VAL_TYPE_I64:
|
|
35
|
+
return (double) io_val.i64;
|
|
36
|
+
default:
|
|
37
|
+
evoasm_log_fatal("unsupported input/output value type %d", io_val_type);
|
|
38
|
+
evoasm_assert_not_reached();
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
bool
|
|
43
|
+
evoasm_program_destroy(evoasm_program_t *program) {
|
|
44
|
+
|
|
45
|
+
bool retval = true;
|
|
46
|
+
|
|
47
|
+
if(!program->shallow) {
|
|
48
|
+
for(size_t i = 0; i < program->size; i++) {
|
|
49
|
+
evoasm_kernel_t *kernel = &program->kernels[i];
|
|
50
|
+
evoasm_free(kernel->insts);
|
|
51
|
+
switch(program->arch_info->id) {
|
|
52
|
+
case EVOASM_ARCH_X64:
|
|
53
|
+
evoasm_free(kernel->params.x64);
|
|
54
|
+
break;
|
|
55
|
+
default:
|
|
56
|
+
evoasm_assert_not_reached();
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
evoasm_free(program->jmp_offs);
|
|
63
|
+
evoasm_free(program->jmp_conds);
|
|
64
|
+
evoasm_free(program->kernels);
|
|
65
|
+
evoasm_free(program->recur_counters);
|
|
66
|
+
evoasm_free(program->output_vals);
|
|
67
|
+
|
|
68
|
+
if(program->buf) {
|
|
69
|
+
if(!evoasm_buf_destroy(program->buf)) {
|
|
70
|
+
retval = false;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if(program->body_buf) {
|
|
75
|
+
if(!evoasm_buf_destroy(program->body_buf)) {
|
|
76
|
+
retval = false;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return retval;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
#if 0
|
|
84
|
+
evoasm_success_t
|
|
85
|
+
evoasm_program_clone(evoasm_program_t *program, evoasm_program_t *cloned_program) {
|
|
86
|
+
size_t i = 0;
|
|
87
|
+
|
|
88
|
+
*cloned_program = *program;
|
|
89
|
+
cloned_program->reset_rflags = false;
|
|
90
|
+
cloned_program->_input.len = 0;
|
|
91
|
+
cloned_program->_output.len = 0;
|
|
92
|
+
cloned_program->output_vals = NULL;
|
|
93
|
+
cloned_program->buf = NULL;
|
|
94
|
+
cloned_program->body_buf = NULL;
|
|
95
|
+
|
|
96
|
+
/* memory addresses in original buffer point to memory in original program,
|
|
97
|
+
* we need to reemit assembly, this is done in a lazy fashion */
|
|
98
|
+
cloned_program->need_emit = true;
|
|
99
|
+
|
|
100
|
+
EVOASM_TRY(error, evoasm_buf_clone, program->buf, &cloned_program->_buf);
|
|
101
|
+
cloned_program->buf = &cloned_program->_buf;
|
|
102
|
+
EVOASM_TRY(error, evoasm_buf_clone, program->body_buf, &cloned_program->_body_buf);
|
|
103
|
+
cloned_program->body_buf = &cloned_program->_body_buf;
|
|
104
|
+
|
|
105
|
+
size_t program_params_size = sizeof(evoasm_program_params_t);
|
|
106
|
+
cloned_program->params = evoasm_malloc(program_params_size);
|
|
107
|
+
|
|
108
|
+
if(!cloned_program->params) {
|
|
109
|
+
goto error;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
memcpy(cloned_program->params, program->params, program_params_size);
|
|
113
|
+
|
|
114
|
+
for(; i < program->size; i++) {
|
|
115
|
+
evoasm_kernel_t *orig_kernel = &program->kernels[i];
|
|
116
|
+
evoasm_kernel_t *cloned_kernel = &cloned_program->kernels[i];
|
|
117
|
+
*cloned_kernel = *orig_kernel;
|
|
118
|
+
|
|
119
|
+
size_t params_size =
|
|
120
|
+
sizeof(evoasm_kernel_params_t) + orig_kernel->size * sizeof(evoasm_kernel_param_t);
|
|
121
|
+
cloned_kernel->params = evoasm_malloc(params_size);
|
|
122
|
+
if(!cloned_kernel->params) {
|
|
123
|
+
goto error;
|
|
124
|
+
}
|
|
125
|
+
memcpy(cloned_kernel->params, orig_kernel->params, params_size);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return true;
|
|
129
|
+
|
|
130
|
+
error:
|
|
131
|
+
(void) evoasm_program_destroy_(cloned_program, i);
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
#endif
|
|
135
|
+
|
|
136
|
+
evoasm_buf_t *
|
|
137
|
+
evoasm_program_get_buf(evoasm_program_t *program, bool body) {
|
|
138
|
+
if(body) {
|
|
139
|
+
return program->body_buf;
|
|
140
|
+
} else {
|
|
141
|
+
return program->buf;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
size_t
|
|
146
|
+
evoasm_program_get_size(evoasm_program_t *program) {
|
|
147
|
+
return program->size;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
size_t
|
|
151
|
+
evoasm_program_get_kernel_code(evoasm_program_t *program, size_t kernel_idx, const uint8_t **code) {
|
|
152
|
+
evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
|
|
153
|
+
size_t len = (size_t) kernel->buf_end - kernel->buf_start;
|
|
154
|
+
*code = program->body_buf->data + kernel->buf_start;
|
|
155
|
+
return len;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
size_t
|
|
159
|
+
evoasm_program_get_code(evoasm_program_t *program, bool frame, const uint8_t **code) {
|
|
160
|
+
evoasm_buf_t *buf;
|
|
161
|
+
if(frame) {
|
|
162
|
+
buf = program->buf;
|
|
163
|
+
} else {
|
|
164
|
+
buf = program->body_buf;
|
|
165
|
+
}
|
|
166
|
+
*code = buf->data;
|
|
167
|
+
return buf->pos;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
int
|
|
172
|
+
evoasm_program_get_jmp_off(evoasm_program_t *program, size_t pos) {
|
|
173
|
+
return program->jmp_offs[pos];
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
bool
|
|
178
|
+
evoasm_program_is_input_reg(evoasm_program_t *program, size_t kernel_idx, evoasm_reg_id_t reg_id) {
|
|
179
|
+
evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
|
|
180
|
+
switch(program->arch_info->id) {
|
|
181
|
+
case EVOASM_ARCH_X64:
|
|
182
|
+
return kernel->reg_info.x64.regs[reg_id].input;
|
|
183
|
+
default:
|
|
184
|
+
evoasm_assert_not_reached();
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
bool
|
|
189
|
+
evoasm_program_is_output_reg(evoasm_program_t *program, size_t kernel_idx, evoasm_reg_id_t reg_id) {
|
|
190
|
+
evoasm_kernel_t *kernel = &program->kernels[kernel_idx];
|
|
191
|
+
switch(program->arch_info->id) {
|
|
192
|
+
case EVOASM_ARCH_X64:
|
|
193
|
+
return kernel->reg_info.x64.regs[reg_id].output;
|
|
194
|
+
default:
|
|
195
|
+
evoasm_assert_not_reached();
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
#define EVOASM_PROGRAM_TMP_REG_X64 EVOASM_X64_REG_14
|
|
200
|
+
|
|
201
|
+
static evoasm_success_t
|
|
202
|
+
evoasm_program_x64_emit_rflags_reset(evoasm_program_t *program) {
|
|
203
|
+
evoasm_x64_params_t params = {0};
|
|
204
|
+
evoasm_buf_t *buf = program->buf;
|
|
205
|
+
|
|
206
|
+
evoasm_log_debug("emitting RFLAGS reset");
|
|
207
|
+
EVOASM_X64_ENC(pushfq);
|
|
208
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_X64_REG_SP);
|
|
209
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, 0);
|
|
210
|
+
EVOASM_X64_ENC(mov_rm64_imm32);
|
|
211
|
+
EVOASM_X64_ENC(popfq);
|
|
212
|
+
|
|
213
|
+
return true;
|
|
214
|
+
enc_failed:
|
|
215
|
+
return false;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
static evoasm_used evoasm_success_t
|
|
219
|
+
evoasm_program_x64_emit_mxcsr_reset(evoasm_program_t *program) {
|
|
220
|
+
static uint32_t default_mxcsr_val = 0x1f80;
|
|
221
|
+
evoasm_x64_params_t params = {0};
|
|
222
|
+
evoasm_buf_t *buf = program->buf;
|
|
223
|
+
|
|
224
|
+
evoasm_param_val_t addr_imm = (evoasm_param_val_t) (uintptr_t) &default_mxcsr_val;
|
|
225
|
+
evoasm_x64_reg_id_t reg_tmp0 = EVOASM_PROGRAM_TMP_REG_X64;
|
|
226
|
+
|
|
227
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, reg_tmp0);
|
|
228
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, addr_imm);
|
|
229
|
+
EVOASM_X64_ENC(mov_r32_imm32);
|
|
230
|
+
|
|
231
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, reg_tmp0);
|
|
232
|
+
EVOASM_X64_ENC(ldmxcsr_m32);
|
|
233
|
+
|
|
234
|
+
return true;
|
|
235
|
+
enc_failed:
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
static evoasm_success_t
|
|
241
|
+
evoasm_program_x64_emit_output_store(evoasm_program_t *program,
|
|
242
|
+
size_t tuple_idx) {
|
|
243
|
+
|
|
244
|
+
evoasm_x64_params_t params = {0};
|
|
245
|
+
evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
|
|
246
|
+
evoasm_buf_t *buf = program->buf;
|
|
247
|
+
|
|
248
|
+
for(size_t i = 0; i < kernel->n_output_regs; i++) {
|
|
249
|
+
evoasm_x64_reg_id_t reg_id = kernel->output_regs.x64[i];
|
|
250
|
+
evoasm_program_io_val_t *val_addr = &program->output_vals[(tuple_idx * kernel->n_output_regs) + i];
|
|
251
|
+
evoasm_x64_reg_type_t reg_type = evoasm_x64_get_reg_type(reg_id);
|
|
252
|
+
|
|
253
|
+
evoasm_param_val_t addr_imm = (evoasm_param_val_t) (uintptr_t) val_addr;
|
|
254
|
+
|
|
255
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
|
|
256
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, addr_imm);
|
|
257
|
+
EVOASM_X64_ENC(mov_r64_imm64);
|
|
258
|
+
|
|
259
|
+
switch(reg_type) {
|
|
260
|
+
case EVOASM_X64_REG_TYPE_GP: {
|
|
261
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, reg_id);
|
|
262
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
|
|
263
|
+
EVOASM_X64_ENC(mov_rm64_r64);
|
|
264
|
+
break;
|
|
265
|
+
}
|
|
266
|
+
case EVOASM_X64_REG_TYPE_XMM: {
|
|
267
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, reg_id);
|
|
268
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
|
|
269
|
+
EVOASM_X64_ENC(movsd_xmmm64_xmm);
|
|
270
|
+
break;
|
|
271
|
+
}
|
|
272
|
+
default: {
|
|
273
|
+
evoasm_assert_not_reached();
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return true;
|
|
279
|
+
|
|
280
|
+
enc_failed:
|
|
281
|
+
return false;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
static evoasm_x64_reg_id_t
|
|
285
|
+
evoasm_kernel_get_operand_reg_id_x64(evoasm_kernel_t *kernel, evoasm_x64_operand_t *op, size_t inst_idx) {
|
|
286
|
+
evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[inst_idx]);
|
|
287
|
+
|
|
288
|
+
if(op->param_idx < inst->n_params) {
|
|
289
|
+
return (evoasm_x64_reg_id_t) evoasm_x64_basic_params_get_(&kernel->params.x64[inst_idx],
|
|
290
|
+
(evoasm_x64_basic_param_id_t) inst->params[op->param_idx].id);
|
|
291
|
+
} else if(op->reg_id < EVOASM_X64_REG_NONE) {
|
|
292
|
+
return (evoasm_x64_reg_id_t) op->reg_id;
|
|
293
|
+
} else {
|
|
294
|
+
evoasm_assert_not_reached();
|
|
295
|
+
return EVOASM_X64_REG_NONE;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
typedef struct {
|
|
300
|
+
evoasm_bitmap512_t mask;
|
|
301
|
+
} evoasm_x64_reg_liveness_t;
|
|
302
|
+
|
|
303
|
+
static void
|
|
304
|
+
evoasm_x64_reg_liveness_or_mask(evoasm_x64_inst_t *inst, evoasm_x64_operand_t *op, evoasm_x64_basic_params_t *params,
|
|
305
|
+
evoasm_bitmap512_t *mask512) {
|
|
306
|
+
evoasm_bitmap_t *mask = (evoasm_bitmap_t *) mask512;
|
|
307
|
+
switch(op->word) {
|
|
308
|
+
case EVOASM_X64_OPERAND_WORD_LB:
|
|
309
|
+
if(!op->implicit && op->param_idx < inst->n_params &&
|
|
310
|
+
(
|
|
311
|
+
(inst->params[op->param_idx].id == EVOASM_X64_BASIC_PARAM_REG0 && params->reg0_high_byte)
|
|
312
|
+
||
|
|
313
|
+
(inst->params[op->param_idx].id == EVOASM_X64_BASIC_PARAM_REG1 && params->reg1_high_byte)
|
|
314
|
+
)) {
|
|
315
|
+
goto hb;
|
|
316
|
+
}
|
|
317
|
+
evoasm_bitmap_or64(mask, 0, 0x00ffu);
|
|
318
|
+
break;
|
|
319
|
+
case EVOASM_X64_OPERAND_WORD_HB: {
|
|
320
|
+
hb:
|
|
321
|
+
evoasm_bitmap_or64(mask, 0, 0xff00u);
|
|
322
|
+
break;
|
|
323
|
+
}
|
|
324
|
+
case EVOASM_X64_OPERAND_WORD_W:
|
|
325
|
+
evoasm_bitmap_or64(mask, 0, 0xffffu);
|
|
326
|
+
break;
|
|
327
|
+
case EVOASM_X64_OPERAND_WORD_DW:
|
|
328
|
+
/* 32bit writes clear the whole register */
|
|
329
|
+
if(op->reg_type == EVOASM_X64_REG_TYPE_GP) {
|
|
330
|
+
evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
|
|
331
|
+
} else {
|
|
332
|
+
/* xmm[0..31] does this for example */
|
|
333
|
+
evoasm_bitmap_or64(mask, 0, 0xffffffffu);
|
|
334
|
+
}
|
|
335
|
+
break;
|
|
336
|
+
case EVOASM_X64_OPERAND_WORD_LQW:
|
|
337
|
+
evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
|
|
338
|
+
break;
|
|
339
|
+
case EVOASM_X64_OPERAND_WORD_HQW:
|
|
340
|
+
evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
|
|
341
|
+
break;
|
|
342
|
+
case EVOASM_X64_OPERAND_WORD_DQW:
|
|
343
|
+
evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
|
|
344
|
+
evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
|
|
345
|
+
break;
|
|
346
|
+
case EVOASM_X64_OPERAND_WORD_VW:
|
|
347
|
+
evoasm_bitmap_or64(mask, 0, 0xffffffffffffffffull);
|
|
348
|
+
evoasm_bitmap_or64(mask, 1, 0xffffffffffffffffull);
|
|
349
|
+
evoasm_bitmap_or64(mask, 2, 0xffffffffffffffffull);
|
|
350
|
+
evoasm_bitmap_or64(mask, 3, 0xffffffffffffffffull);
|
|
351
|
+
break;
|
|
352
|
+
default:
|
|
353
|
+
evoasm_assert_not_reached();
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
static void
|
|
358
|
+
evoasm_x64_reg_liveness_update(evoasm_x64_reg_liveness_t *reg_liveness, evoasm_x64_inst_t *inst,
|
|
359
|
+
evoasm_x64_operand_t *op, evoasm_x64_basic_params_t *params) {
|
|
360
|
+
evoasm_x64_reg_liveness_or_mask(inst, op, params, ®_liveness->mask);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
static bool
|
|
365
|
+
evoasm_x64_reg_liveness_is_dirty_read_(evoasm_x64_reg_liveness_t *reg_liveness,
|
|
366
|
+
evoasm_bitmap512_t *mask) {
|
|
367
|
+
|
|
368
|
+
evoasm_bitmap512_andn(mask, ®_liveness->mask, mask);
|
|
369
|
+
return !evoasm_bitmap512_is_zero(mask);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
static bool
|
|
373
|
+
evoasm_x64_reg_liveness_is_dirty_read(evoasm_x64_reg_liveness_t *reg_liveness, evoasm_x64_inst_t *inst,
|
|
374
|
+
evoasm_x64_operand_t *op,
|
|
375
|
+
evoasm_x64_basic_params_t *params) {
|
|
376
|
+
|
|
377
|
+
evoasm_bitmap512_t mask = {0};
|
|
378
|
+
evoasm_x64_reg_liveness_or_mask(inst, op, params, &mask);
|
|
379
|
+
|
|
380
|
+
return evoasm_x64_reg_liveness_is_dirty_read_(reg_liveness, &mask);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
static bool
|
|
385
|
+
evoasm_kernel_is_writing_inst_x64(evoasm_kernel_t *kernel, size_t inst_idx, evoasm_reg_id_t reg_id,
|
|
386
|
+
evoasm_x64_reg_liveness_t *reg_liveness) {
|
|
387
|
+
evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[inst_idx]);
|
|
388
|
+
|
|
389
|
+
for(size_t i = 0; i < x64_inst->n_operands; i++) {
|
|
390
|
+
evoasm_x64_operand_t *op = &x64_inst->operands[i];
|
|
391
|
+
evoasm_x64_reg_id_t op_reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, inst_idx);
|
|
392
|
+
evoasm_x64_basic_params_t *x64_basic_params = &kernel->params.x64[inst_idx];
|
|
393
|
+
|
|
394
|
+
if(op->written && op_reg_id == reg_id &&
|
|
395
|
+
evoasm_x64_reg_liveness_is_dirty_read(reg_liveness, x64_inst, op, x64_basic_params)) {
|
|
396
|
+
evoasm_x64_reg_liveness_update(reg_liveness, x64_inst, op, x64_basic_params);
|
|
397
|
+
return true;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
return false;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
static void
|
|
405
|
+
evoasm_x64_reg_liveness_init(evoasm_x64_reg_liveness_t *reg_liveness) {
|
|
406
|
+
static evoasm_x64_reg_liveness_t zero_reg_liveness = {0};
|
|
407
|
+
*reg_liveness = zero_reg_liveness;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
static evoasm_success_t
|
|
411
|
+
evoasm_program_x64_prepare_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel) {
|
|
412
|
+
/* NOTE: output register are register that are written to
|
|
413
|
+
* _input registers are register that are read from without
|
|
414
|
+
* a previous write
|
|
415
|
+
*/
|
|
416
|
+
evoasm_x64_reg_liveness_t reg_livenesses[EVOASM_X64_REG_NONE];
|
|
417
|
+
for(int i = 0; i < EVOASM_X64_REG_NONE; i++) {
|
|
418
|
+
evoasm_x64_reg_liveness_init(®_livenesses[i]);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
kernel->n_input_regs = 0;
|
|
422
|
+
kernel->n_output_regs = 0;
|
|
423
|
+
|
|
424
|
+
static evoasm_kernel_reg_info_t zero_reg_info = {0};
|
|
425
|
+
kernel->reg_info = zero_reg_info;
|
|
426
|
+
|
|
427
|
+
/* First, handle read ops, so that writing ops do not disturb us */
|
|
428
|
+
for(size_t i = 0; i < kernel->size; i++) {
|
|
429
|
+
evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
|
|
430
|
+
evoasm_x64_basic_params_t *x64_basic_params = &kernel->params.x64[i];
|
|
431
|
+
|
|
432
|
+
for(size_t j = 0; j < x64_inst->n_operands; j++) {
|
|
433
|
+
evoasm_x64_operand_t *op = &x64_inst->operands[j];
|
|
434
|
+
|
|
435
|
+
if((op->read || op->maybe_written) &&
|
|
436
|
+
(op->type == EVOASM_X64_OPERAND_TYPE_REG || op->type == EVOASM_X64_OPERAND_TYPE_RM)) {
|
|
437
|
+
|
|
438
|
+
if(op->reg_type == EVOASM_X64_REG_TYPE_RFLAGS) {
|
|
439
|
+
program->reset_rflags = true;
|
|
440
|
+
} else {
|
|
441
|
+
evoasm_x64_reg_id_t reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) i);
|
|
442
|
+
evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
|
|
443
|
+
evoasm_x64_reg_liveness_t *reg_liveness = ®_livenesses[reg_id];
|
|
444
|
+
|
|
445
|
+
if(!reg_info->input) {
|
|
446
|
+
// has not been written before, might contain garbage
|
|
447
|
+
bool dirty_read;
|
|
448
|
+
|
|
449
|
+
/* the writer rank check is needed for the following case
|
|
450
|
+
* inst regX (operand1, written), regX (operand2, read)
|
|
451
|
+
*
|
|
452
|
+
* The first operand marks regX as written. The read in the second
|
|
453
|
+
* operand, however, is dirty, since the write has not yet occurred at this point.
|
|
454
|
+
*/
|
|
455
|
+
|
|
456
|
+
if(reg_info->written) {
|
|
457
|
+
dirty_read = evoasm_x64_reg_liveness_is_dirty_read(reg_liveness, x64_inst, op, x64_basic_params);
|
|
458
|
+
} else {
|
|
459
|
+
dirty_read = true;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
if(dirty_read) {
|
|
463
|
+
reg_info->input = true;
|
|
464
|
+
kernel->n_input_regs++;
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
for(size_t j = 0; j < x64_inst->n_operands; j++) {
|
|
472
|
+
evoasm_x64_operand_t *op = &x64_inst->operands[j];
|
|
473
|
+
|
|
474
|
+
if(op->written && (op->type == EVOASM_X64_OPERAND_TYPE_REG || op->type == EVOASM_X64_OPERAND_TYPE_RM)) {
|
|
475
|
+
|
|
476
|
+
if(op->reg_type == EVOASM_X64_REG_TYPE_RFLAGS) {
|
|
477
|
+
kernel->reg_info.x64.written_flags =
|
|
478
|
+
(kernel->reg_info.x64.written_flags | op->written_flags) & EVOASM_X64_RFLAGS_FLAGS_BITSIZE;
|
|
479
|
+
kernel->reg_info.x64.regs[EVOASM_X64_REG_RFLAGS].written = true;
|
|
480
|
+
} else {
|
|
481
|
+
evoasm_x64_reg_id_t reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) i);
|
|
482
|
+
evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
|
|
483
|
+
evoasm_x64_reg_liveness_t *reg_liveness = ®_livenesses[reg_id];
|
|
484
|
+
|
|
485
|
+
if(!reg_info->written) {
|
|
486
|
+
reg_info->written = true;
|
|
487
|
+
reg_info->output = true;
|
|
488
|
+
kernel->output_regs.x64[kernel->n_output_regs] = reg_id;
|
|
489
|
+
kernel->n_output_regs++;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
evoasm_x64_reg_liveness_update(reg_liveness, x64_inst, op, x64_basic_params);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
for(int i = 0; i < kernel->n_output_regs; i++) {
|
|
499
|
+
evoasm_x64_reg_id_t reg_id = kernel->output_regs.x64[i];
|
|
500
|
+
evoasm_kernel_x64_reg_info_reg_t *reg_info = &kernel->reg_info.x64.regs[reg_id];
|
|
501
|
+
if(!reg_info->input) {
|
|
502
|
+
evoasm_x64_reg_liveness_t *reg_liveness = ®_livenesses[reg_id];
|
|
503
|
+
|
|
504
|
+
evoasm_bitmap512_t mask = {0};
|
|
505
|
+
|
|
506
|
+
switch(evoasm_x64_get_reg_type(reg_id)) {
|
|
507
|
+
case EVOASM_X64_REG_TYPE_GP:
|
|
508
|
+
evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 0, 0xffffffffffffffffull);
|
|
509
|
+
break;
|
|
510
|
+
case EVOASM_X64_REG_TYPE_XMM:
|
|
511
|
+
case EVOASM_X64_REG_TYPE_ZMM:
|
|
512
|
+
evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 0, 0xffffffffffffffffull);
|
|
513
|
+
evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 1, 0xffffffffffffffffull);
|
|
514
|
+
evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 2, 0xffffffffffffffffull);
|
|
515
|
+
evoasm_bitmap_set64((evoasm_bitmap_t *) &mask, 3, 0xffffffffffffffffull);
|
|
516
|
+
break;
|
|
517
|
+
default:
|
|
518
|
+
evoasm_assert_not_reached();
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
bool dirty_read = evoasm_x64_reg_liveness_is_dirty_read_(reg_liveness, &mask);
|
|
522
|
+
if(dirty_read) {
|
|
523
|
+
reg_info->input = true;
|
|
524
|
+
kernel->n_input_regs++;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
#ifdef EVOASM_ENABLE_PARANOID_MODE
|
|
530
|
+
{
|
|
531
|
+
size_t n_input_regs = 0;
|
|
532
|
+
for(evoasm_x64_reg_id_t i = (evoasm_x64_reg_id_t) 0; i < EVOASM_X64_REG_NONE; i++) {
|
|
533
|
+
if(kernel->reg_info.x64.regs[i].input) n_input_regs++;
|
|
534
|
+
}
|
|
535
|
+
assert(n_input_regs == kernel->n_input_regs);
|
|
536
|
+
}
|
|
537
|
+
#endif
|
|
538
|
+
|
|
539
|
+
assert(kernel->n_output_regs <= EVOASM_KERNEL_MAX_OUTPUT_REGS);
|
|
540
|
+
assert(kernel->n_input_regs <= EVOASM_KERNEL_MAX_INPUT_REGS);
|
|
541
|
+
|
|
542
|
+
if(kernel->n_output_regs == 0) {
|
|
543
|
+
evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_PROGRAM_ERROR_CODE_NO_OUTPUT, NULL);
|
|
544
|
+
return false;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
return true;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
static evoasm_success_t
|
|
551
|
+
evoasm_program_x64_prepare(evoasm_program_t *program) {
|
|
552
|
+
for(size_t i = 0; i < program->size; i++) {
|
|
553
|
+
evoasm_kernel_t *kernel = &program->kernels[i];
|
|
554
|
+
EVOASM_TRY(error, evoasm_program_x64_prepare_kernel, program, kernel);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
return true;
|
|
558
|
+
|
|
559
|
+
error:
|
|
560
|
+
return false;
|
|
561
|
+
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
static evoasm_success_t
|
|
566
|
+
evoasm_program_x64_emit_input_reg_load(evoasm_x64_reg_id_t input_reg_id,
|
|
567
|
+
evoasm_buf_t *buf,
|
|
568
|
+
evoasm_program_io_val_t *tuple,
|
|
569
|
+
evoasm_program_io_val_t *loaded_tuple,
|
|
570
|
+
bool force_load) {
|
|
571
|
+
|
|
572
|
+
evoasm_x64_reg_type_t reg_type = evoasm_x64_get_reg_type(input_reg_id);
|
|
573
|
+
evoasm_x64_params_t params = {0};
|
|
574
|
+
|
|
575
|
+
evoasm_log_debug("emitting _input register initialization of register %d to value %"
|
|
576
|
+
PRId64, input_reg_id, tuple->i64);
|
|
577
|
+
|
|
578
|
+
switch(reg_type) {
|
|
579
|
+
case EVOASM_X64_REG_TYPE_GP: {
|
|
580
|
+
if(force_load) {
|
|
581
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
|
|
582
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) (uintptr_t) &tuple->i64);
|
|
583
|
+
EVOASM_X64_ENC(mov_r64_imm64);
|
|
584
|
+
|
|
585
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
|
|
586
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
|
|
587
|
+
EVOASM_X64_ENC(mov_r64_rm64);
|
|
588
|
+
} else {
|
|
589
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
|
|
590
|
+
/*FIXME: hard-coded tuple type */
|
|
591
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) tuple->i64);
|
|
592
|
+
EVOASM_X64_ENC(mov_r64_imm64);
|
|
593
|
+
}
|
|
594
|
+
break;
|
|
595
|
+
}
|
|
596
|
+
case EVOASM_X64_REG_TYPE_XMM: {
|
|
597
|
+
/* load address of tuple into tmp_reg */
|
|
598
|
+
if(loaded_tuple != tuple) {
|
|
599
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
|
|
600
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) (uintptr_t) &tuple->f64);
|
|
601
|
+
EVOASM_X64_ENC(mov_r64_imm64);
|
|
602
|
+
loaded_tuple = tuple;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
/* load into xmm via address in tmp_reg */
|
|
606
|
+
/*FIXME: hard-coded tuple type */
|
|
607
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
|
|
608
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
|
|
609
|
+
EVOASM_X64_ENC(movsd_xmm_xmmm64);
|
|
610
|
+
break;
|
|
611
|
+
}
|
|
612
|
+
default:
|
|
613
|
+
evoasm_log_fatal("non-gpr register type (%d) (unimplemented)", reg_type);
|
|
614
|
+
evoasm_assert_not_reached();
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
return true;
|
|
618
|
+
|
|
619
|
+
enc_failed:
|
|
620
|
+
return false;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
static evoasm_success_t
|
|
625
|
+
evoasm_program_x64_emit_input_load(evoasm_program_t *program,
|
|
626
|
+
evoasm_program_io_val_t *input_vals,
|
|
627
|
+
evoasm_program_io_val_type_t *types,
|
|
628
|
+
size_t in_arity,
|
|
629
|
+
bool set_io_mapping) {
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
evoasm_program_io_val_t *loaded_tuple = NULL;
|
|
633
|
+
evoasm_buf_t *buf = program->buf;
|
|
634
|
+
evoasm_kernel_t *kernel = &program->kernels[0];
|
|
635
|
+
|
|
636
|
+
evoasm_log_debug("n _input regs %d", kernel->n_input_regs);
|
|
637
|
+
#if 0
|
|
638
|
+
for(input_reg_id = (evoasm_x64_reg_id_t) 13; input_reg_id < 19; input_reg_id++) {
|
|
639
|
+
if(input_reg_id == EVOASM_X64_REG_SP) continue;
|
|
640
|
+
evoasm_x64_params_t params = {0};
|
|
641
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
|
|
642
|
+
/*FIXME: hard-coded tuple type */
|
|
643
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, 0);
|
|
644
|
+
EVOASM_X64_ENC(mov_r64_imm64);
|
|
645
|
+
}
|
|
646
|
+
#endif
|
|
647
|
+
|
|
648
|
+
{
|
|
649
|
+
size_t input_reg_idx = 0;
|
|
650
|
+
for(evoasm_x64_reg_id_t input_reg = (evoasm_x64_reg_id_t) 0; input_reg < EVOASM_X64_REG_NONE; input_reg++) {
|
|
651
|
+
if(!kernel->reg_info.x64.regs[input_reg].input) continue;
|
|
652
|
+
|
|
653
|
+
size_t tuple_idx;
|
|
654
|
+
|
|
655
|
+
if(set_io_mapping) {
|
|
656
|
+
tuple_idx = input_reg_idx++ % in_arity;
|
|
657
|
+
program->reg_inputs.x64[input_reg] = (uint8_t) tuple_idx;
|
|
658
|
+
} else {
|
|
659
|
+
tuple_idx = program->reg_inputs.x64[input_reg];
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
evoasm_program_io_val_t *tuple = &input_vals[tuple_idx];
|
|
663
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_input_reg_load, input_reg, buf, tuple, loaded_tuple, false);
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
#ifdef EVOASM_ENABLE_PARANOID_MODE
|
|
668
|
+
EVOASM_TRY(error, evoasm_x64_emit_push, EVOASM_PROGRAM_TMP_REG_X64, buf);
|
|
669
|
+
for(evoasm_x64_reg_id_t non_input_reg = (evoasm_x64_reg_id_t) EVOASM_X64_REG_A;
|
|
670
|
+
non_input_reg < EVOASM_X64_REG_15; non_input_reg++) {
|
|
671
|
+
if(kernel->reg_info.x64.regs[non_input_reg].input) continue;
|
|
672
|
+
if(non_input_reg == EVOASM_X64_REG_SP) continue;
|
|
673
|
+
|
|
674
|
+
evoasm_program_io_val_t *tuple = &kernel->rand_vals[non_input_reg];
|
|
675
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_input_reg_load, non_input_reg, buf, tuple, NULL, true);
|
|
676
|
+
}
|
|
677
|
+
EVOASM_TRY(error, evoasm_x64_emit_pop, EVOASM_PROGRAM_TMP_REG_X64, buf);
|
|
678
|
+
#endif
|
|
679
|
+
|
|
680
|
+
if(program->reset_rflags) {
|
|
681
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_rflags_reset, program);
|
|
682
|
+
}
|
|
683
|
+
return true;
|
|
684
|
+
|
|
685
|
+
error:
|
|
686
|
+
return false;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
static evoasm_success_t
|
|
690
|
+
evoasm_program_x64_emit_kernel_transition(evoasm_program_t *program,
|
|
691
|
+
evoasm_kernel_t *from_kernel,
|
|
692
|
+
evoasm_kernel_t *to_kernel,
|
|
693
|
+
evoasm_buf_t *buf,
|
|
694
|
+
size_t trans_idx,
|
|
695
|
+
bool set_io_mapping) {
|
|
696
|
+
size_t input_reg_idx;
|
|
697
|
+
evoasm_x64_reg_id_t input_reg_id;
|
|
698
|
+
|
|
699
|
+
assert(from_kernel->n_output_regs > 0);
|
|
700
|
+
|
|
701
|
+
for(input_reg_id = (evoasm_x64_reg_id_t) 0, input_reg_idx = 0; input_reg_id < EVOASM_X64_REG_NONE; input_reg_id++) {
|
|
702
|
+
if(!to_kernel->reg_info.x64.regs[input_reg_id].input) continue;
|
|
703
|
+
|
|
704
|
+
evoasm_x64_reg_id_t output_reg_id;
|
|
705
|
+
|
|
706
|
+
if(set_io_mapping) {
|
|
707
|
+
size_t output_reg_idx = input_reg_idx % from_kernel->n_output_regs;
|
|
708
|
+
output_reg_id = from_kernel->output_regs.x64[output_reg_idx];
|
|
709
|
+
|
|
710
|
+
from_kernel->reg_info.x64.trans_regs[trans_idx][input_reg_id] = output_reg_id;
|
|
711
|
+
} else {
|
|
712
|
+
output_reg_id = from_kernel->reg_info.x64.trans_regs[trans_idx][input_reg_id];
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
evoasm_x64_reg_type_t output_reg_type = evoasm_x64_get_reg_type(output_reg_id);
|
|
716
|
+
evoasm_x64_reg_type_t input_reg_type = evoasm_x64_get_reg_type(input_reg_id);
|
|
717
|
+
evoasm_x64_params_t params = {0};
|
|
718
|
+
|
|
719
|
+
if(input_reg_id != output_reg_id) {
|
|
720
|
+
if(output_reg_type == EVOASM_X64_REG_TYPE_GP &&
|
|
721
|
+
input_reg_type == EVOASM_X64_REG_TYPE_GP) {
|
|
722
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
|
|
723
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
|
|
724
|
+
EVOASM_X64_ENC(mov_r64_rm64);
|
|
725
|
+
} else if(output_reg_type == EVOASM_X64_REG_TYPE_XMM &&
|
|
726
|
+
input_reg_type == EVOASM_X64_REG_TYPE_XMM) {
|
|
727
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
|
|
728
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
|
|
729
|
+
if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
|
|
730
|
+
EVOASM_X64_ENC(vmovdqa_ymm_ymmm256);
|
|
731
|
+
} else {
|
|
732
|
+
EVOASM_X64_ENC(movdqa_xmm_xmmm128);
|
|
733
|
+
}
|
|
734
|
+
} else if(output_reg_type == EVOASM_X64_REG_TYPE_GP &&
|
|
735
|
+
input_reg_type == EVOASM_X64_REG_TYPE_XMM) {
|
|
736
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
|
|
737
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
|
|
738
|
+
if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
|
|
739
|
+
EVOASM_X64_ENC(vmovq_xmm_rm64);
|
|
740
|
+
} else {
|
|
741
|
+
EVOASM_X64_ENC(movq_xmm_rm64);
|
|
742
|
+
}
|
|
743
|
+
} else if(output_reg_type == EVOASM_X64_REG_TYPE_XMM &&
|
|
744
|
+
input_reg_type == EVOASM_X64_REG_TYPE_GP) {
|
|
745
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, input_reg_id);
|
|
746
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG1, output_reg_id);
|
|
747
|
+
if(program->arch_info->features & EVOASM_X64_FEATURE_AVX) {
|
|
748
|
+
EVOASM_X64_ENC(vmovq_rm64_xmm);
|
|
749
|
+
} else {
|
|
750
|
+
EVOASM_X64_ENC(movq_rm64_xmm);
|
|
751
|
+
}
|
|
752
|
+
} else {
|
|
753
|
+
evoasm_assert_not_reached();
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
input_reg_idx++;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
return true;
|
|
760
|
+
|
|
761
|
+
enc_failed:
|
|
762
|
+
return false;
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
#define EVOASM_BUF_PHI_GET(buf) ((uint32_t *)((buf)->data + (buf)->pos - 4))
|
|
766
|
+
#define EVOASM_BUF_PHI_SET(label, val) \
|
|
767
|
+
do { (*(label) = (uint32_t)((uint8_t *)(val) - ((uint8_t *)(label) + 4)));} while(0);
|
|
768
|
+
#define EVOASM_BUF_POS_ADDR(buf) (buf->data + buf->pos)
|
|
769
|
+
|
|
770
|
+
#define EVOASM_PROGRAM_X64_N_JMP_INSTS 16
|
|
771
|
+
|
|
772
|
+
static evoasm_success_t
|
|
773
|
+
evoasm_program_x64_emit_kernel_transitions(evoasm_program_t *program,
|
|
774
|
+
evoasm_kernel_t *kernel,
|
|
775
|
+
evoasm_kernel_t *next_kernel,
|
|
776
|
+
evoasm_kernel_t *branch_kernel,
|
|
777
|
+
evoasm_buf_t *buf,
|
|
778
|
+
uint32_t **branch_kernel_phi,
|
|
779
|
+
bool set_io_mapping) {
|
|
780
|
+
|
|
781
|
+
static const evoasm_x64_inst_id_t jmp_insts[] = {
|
|
782
|
+
EVOASM_X64_INST_JA_REL32, // 0
|
|
783
|
+
EVOASM_X64_INST_JAE_REL32, // 1
|
|
784
|
+
EVOASM_X64_INST_JB_REL32, // 2
|
|
785
|
+
EVOASM_X64_INST_JBE_REL32, // 3
|
|
786
|
+
EVOASM_X64_INST_JE_REL32, // 4
|
|
787
|
+
EVOASM_X64_INST_JG_REL32, // 5
|
|
788
|
+
EVOASM_X64_INST_JGE_REL32, // 6
|
|
789
|
+
EVOASM_X64_INST_JL_REL32, // 7
|
|
790
|
+
EVOASM_X64_INST_JLE_REL32, // 8
|
|
791
|
+
EVOASM_X64_INST_JNE_REL32, // 9
|
|
792
|
+
EVOASM_X64_INST_JNO_REL32, // 10
|
|
793
|
+
EVOASM_X64_INST_JNP_REL32, // 11
|
|
794
|
+
EVOASM_X64_INST_JNS_REL32, // 12
|
|
795
|
+
EVOASM_X64_INST_JO_REL32, // 13
|
|
796
|
+
EVOASM_X64_INST_JP_REL32, // 14
|
|
797
|
+
EVOASM_X64_INST_JS_REL32, // 15
|
|
798
|
+
};
|
|
799
|
+
|
|
800
|
+
evoasm_x64_params_t params = {0};
|
|
801
|
+
uint32_t *branch_phi = NULL;
|
|
802
|
+
uint32_t *counter_phi = NULL;
|
|
803
|
+
|
|
804
|
+
if(program->recur_limit == 0) goto next_transition;
|
|
805
|
+
|
|
806
|
+
evoasm_inst_id_t jmp_inst_id = jmp_insts[program->jmp_conds[kernel->idx] % EVOASM_PROGRAM_X64_N_JMP_INSTS];
|
|
807
|
+
|
|
808
|
+
if(kernel->reg_info.x64.regs[EVOASM_X64_REG_RFLAGS].written) {
|
|
809
|
+
if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_OF)) {
|
|
810
|
+
if(jmp_inst_id == EVOASM_X64_INST_JO_REL32 || jmp_inst_id == EVOASM_X64_INST_JNO_REL32) goto branch_transition;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_SF)) {
|
|
814
|
+
if(jmp_inst_id == EVOASM_X64_INST_JS_REL32 || jmp_inst_id == EVOASM_X64_INST_JNS_REL32) goto branch_transition;
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) {
|
|
818
|
+
if(jmp_inst_id == EVOASM_X64_INST_JE_REL32 ||
|
|
819
|
+
jmp_inst_id == EVOASM_X64_INST_JNE_REL32 ||
|
|
820
|
+
jmp_inst_id == EVOASM_X64_INST_JBE_REL32 ||
|
|
821
|
+
jmp_inst_id == EVOASM_X64_INST_JLE_REL32) {
|
|
822
|
+
goto branch_transition;
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_CF)) {
|
|
827
|
+
if(jmp_inst_id == EVOASM_X64_INST_JB_REL32 ||
|
|
828
|
+
jmp_inst_id == EVOASM_X64_INST_JAE_REL32 ||
|
|
829
|
+
jmp_inst_id == EVOASM_X64_INST_JBE_REL32) {
|
|
830
|
+
goto branch_transition;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
if((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) &&
|
|
835
|
+
(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_CF))) {
|
|
836
|
+
if(jmp_inst_id == EVOASM_X64_INST_JA_REL32) goto branch_transition;
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
if((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_SF)) &&
|
|
840
|
+
(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_OF))) {
|
|
841
|
+
|
|
842
|
+
if(jmp_inst_id == EVOASM_X64_INST_JL_REL32 ||
|
|
843
|
+
jmp_inst_id == EVOASM_X64_INST_JGE_REL32 ||
|
|
844
|
+
jmp_inst_id == EVOASM_X64_INST_JLE_REL32 ||
|
|
845
|
+
((EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_ZF)) &&
|
|
846
|
+
jmp_inst_id == EVOASM_X64_INST_JG_REL32)) {
|
|
847
|
+
goto branch_transition;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
if(EVOASM_X64_RFLAGS_FLAGS_GET(kernel->reg_info.x64.written_flags, EVOASM_X64_RFLAGS_FLAG_PF)) {
|
|
852
|
+
if(jmp_inst_id == EVOASM_X64_INST_JP_REL32 || jmp_inst_id == EVOASM_X64_INST_JNP_REL32) goto branch_transition;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
/* kernel does not write to required jump flag, ignore jmp_off and emit next kernel */
|
|
856
|
+
goto next_transition;
|
|
857
|
+
|
|
858
|
+
#if 0
|
|
859
|
+
/*FIXME: only 8bit possible, check and activate if feasable*/
|
|
860
|
+
if(kernel->reg_info.x64.regs[EVOASM_X64_REG_C].written) {
|
|
861
|
+
jmp_insts[possible_jmp_insts_len++] = EVOASM_X64_INST_JECXZ_JRCXZ_REL8;
|
|
862
|
+
}
|
|
863
|
+
#endif
|
|
864
|
+
|
|
865
|
+
branch_transition:
|
|
866
|
+
{
|
|
867
|
+
evoasm_buf_ref_t buf_ref = {
|
|
868
|
+
.data = buf->data,
|
|
869
|
+
.pos = &buf->pos
|
|
870
|
+
};
|
|
871
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
|
|
872
|
+
EVOASM_TRY(error, evoasm_x64_enc_, (evoasm_x64_inst_id_t) jmp_inst_id, ¶ms, &buf_ref);
|
|
873
|
+
branch_phi = EVOASM_BUF_PHI_GET(buf);
|
|
874
|
+
assert(*branch_phi == 0xdeadbeef);
|
|
875
|
+
|
|
876
|
+
if(branch_kernel->idx <= kernel->idx) {
|
|
877
|
+
/* back jump, guard with counter */
|
|
878
|
+
|
|
879
|
+
uint32_t *counter = &program->recur_counters[kernel->idx];
|
|
880
|
+
uintptr_t addr_imm = (uintptr_t) counter;
|
|
881
|
+
|
|
882
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG0, EVOASM_PROGRAM_TMP_REG_X64);
|
|
883
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, (evoasm_param_val_t) addr_imm);
|
|
884
|
+
EVOASM_X64_ENC(mov_r64_imm64);
|
|
885
|
+
|
|
886
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REG_BASE, EVOASM_PROGRAM_TMP_REG_X64);
|
|
887
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_IMM0, program->recur_limit);
|
|
888
|
+
EVOASM_X64_ENC(cmp_rm32_imm32);
|
|
889
|
+
|
|
890
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
|
|
891
|
+
EVOASM_X64_ENC(jge_rel32);
|
|
892
|
+
|
|
893
|
+
counter_phi = EVOASM_BUF_PHI_GET(buf);
|
|
894
|
+
assert(*counter_phi == 0xdeadbeef);
|
|
895
|
+
|
|
896
|
+
EVOASM_X64_ENC(inc_rm32);
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transition, program,
|
|
900
|
+
kernel, branch_kernel, buf, 1, set_io_mapping);
|
|
901
|
+
|
|
902
|
+
EVOASM_X64_SET(EVOASM_X64_PARAM_REL, 0xdeadbeef);
|
|
903
|
+
EVOASM_X64_ENC(jmp_rel32);
|
|
904
|
+
|
|
905
|
+
*branch_kernel_phi = EVOASM_BUF_PHI_GET(buf);
|
|
906
|
+
assert(**branch_kernel_phi == 0xdeadbeef);
|
|
907
|
+
|
|
908
|
+
if(branch_phi != NULL) {
|
|
909
|
+
EVOASM_BUF_PHI_SET(branch_phi, EVOASM_BUF_POS_ADDR(buf));
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
if(counter_phi != NULL) {
|
|
913
|
+
EVOASM_BUF_PHI_SET(counter_phi, EVOASM_BUF_POS_ADDR(buf));
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
next_transition:
|
|
918
|
+
if(next_kernel != NULL) {
|
|
919
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transition, program,
|
|
920
|
+
kernel, next_kernel, buf, 0, set_io_mapping);
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
evoasm_buf_log(buf, EVOASM_LOG_LEVEL_DEBUG);
|
|
924
|
+
|
|
925
|
+
return true;
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
error:
|
|
929
|
+
enc_failed:
|
|
930
|
+
return false;
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
static evoasm_success_t
|
|
935
|
+
evoasm_program_x64_emit_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel, evoasm_buf_t *buf) {
|
|
936
|
+
evoasm_buf_ref_t buf_ref = {
|
|
937
|
+
.data = buf->data,
|
|
938
|
+
.pos = &buf->pos
|
|
939
|
+
};
|
|
940
|
+
|
|
941
|
+
assert(kernel->size > 0);
|
|
942
|
+
for(size_t i = 0; i < kernel->size; i++) {
|
|
943
|
+
evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
|
|
944
|
+
program->exception_mask = program->exception_mask | inst->exceptions;
|
|
945
|
+
EVOASM_TRY(error, evoasm_x64_inst_enc_basic_, inst, &kernel->params.x64[i], &buf_ref);
|
|
946
|
+
}
|
|
947
|
+
return true;
|
|
948
|
+
error:
|
|
949
|
+
return false;
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
static size_t
|
|
954
|
+
evoasm_program_branch_kernel_idx(evoasm_program_t *program, size_t idx) {
|
|
955
|
+
return (size_t) EVOASM_CLAMP((int) idx + program->jmp_offs[idx], 0, program->size - 1);
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
static evoasm_success_t
|
|
959
|
+
evoasm_program_x64_emit_program_kernels(evoasm_program_t *program, bool set_io_mapping) {
|
|
960
|
+
evoasm_buf_t *buf = program->body_buf;
|
|
961
|
+
evoasm_kernel_t *kernel, *next_kernel, *branch_kernel;
|
|
962
|
+
size_t program_size = program->size;
|
|
963
|
+
uint32_t *branch_phis[EVOASM_PROGRAM_MAX_SIZE] = {0};
|
|
964
|
+
uint8_t *kernel_addrs[EVOASM_PROGRAM_MAX_SIZE];
|
|
965
|
+
|
|
966
|
+
evoasm_buf_reset(buf);
|
|
967
|
+
|
|
968
|
+
assert(program_size > 0);
|
|
969
|
+
|
|
970
|
+
for(size_t i = 0; i < program_size; i++) {
|
|
971
|
+
kernel = &program->kernels[i];
|
|
972
|
+
|
|
973
|
+
kernel_addrs[i] = buf->data + buf->pos;
|
|
974
|
+
kernel->buf_start = (uint16_t) buf->pos;
|
|
975
|
+
|
|
976
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_kernel, program, kernel, buf);
|
|
977
|
+
|
|
978
|
+
if(i < program_size - 1) {
|
|
979
|
+
next_kernel = &program->kernels[i + 1];
|
|
980
|
+
} else {
|
|
981
|
+
next_kernel = NULL;
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
size_t branch_kernel_idx = evoasm_program_branch_kernel_idx(program, i);
|
|
985
|
+
assert(branch_kernel_idx < program->size);
|
|
986
|
+
branch_kernel = &program->kernels[branch_kernel_idx];
|
|
987
|
+
|
|
988
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_kernel_transitions, program, kernel,
|
|
989
|
+
next_kernel, branch_kernel, buf, &branch_phis[i], set_io_mapping);
|
|
990
|
+
|
|
991
|
+
kernel->buf_end = (uint16_t) buf->pos;
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
for(size_t i = 0; i < program_size; i++) {
|
|
995
|
+
size_t branch_kernel_idx = evoasm_program_branch_kernel_idx(program, i);
|
|
996
|
+
uint32_t *branch_phi = branch_phis[i];
|
|
997
|
+
if(branch_phi != NULL) {
|
|
998
|
+
uint8_t *branch_kernel_addr = kernel_addrs[branch_kernel_idx];
|
|
999
|
+
assert(*branch_phi == 0xdeadbeef);
|
|
1000
|
+
EVOASM_BUF_PHI_SET(branch_phi, branch_kernel_addr);
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
return true;
|
|
1005
|
+
error:
|
|
1006
|
+
return false;
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
static evoasm_success_t
|
|
1010
|
+
evoasm_program_x64_emit_io_load_store(evoasm_program_t *program,
|
|
1011
|
+
evoasm_program_input_t *input,
|
|
1012
|
+
bool io_mapping) {
|
|
1013
|
+
size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
|
|
1014
|
+
|
|
1015
|
+
evoasm_buf_reset(program->buf);
|
|
1016
|
+
EVOASM_TRY(error, evoasm_x64_emit_func_prolog, EVOASM_X64_ABI_SYSV, program->buf);
|
|
1017
|
+
|
|
1018
|
+
for(size_t i = 0; i < n_tuples; i++) {
|
|
1019
|
+
evoasm_program_io_val_t *input_vals = input->vals + i * input->arity;
|
|
1020
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_input_load, program, input_vals, input->types, input->arity,
|
|
1021
|
+
io_mapping);
|
|
1022
|
+
size_t r = evoasm_buf_append(program->buf, program->body_buf);
|
|
1023
|
+
assert(r == 0);
|
|
1024
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_output_store, program, i);
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
EVOASM_TRY(error, evoasm_x64_emit_func_epilog, EVOASM_X64_ABI_SYSV, program->buf);
|
|
1028
|
+
return true;
|
|
1029
|
+
|
|
1030
|
+
error:
|
|
1031
|
+
return false;
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
static evoasm_success_t
|
|
1035
|
+
evoasm_program_x64_emit(evoasm_program_t *program,
|
|
1036
|
+
evoasm_program_input_t *input,
|
|
1037
|
+
evoasm_program_emit_flags_t emit_flags) {
|
|
1038
|
+
|
|
1039
|
+
bool set_io_mapping = emit_flags & EVOASM_PROGRAM_EMIT_FLAG_SET_IO_MAPPING;
|
|
1040
|
+
|
|
1041
|
+
if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_PREPARE) {
|
|
1042
|
+
EVOASM_TRY(error, evoasm_program_x64_prepare, program);
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_EMIT_KERNELS) {
|
|
1046
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_program_kernels, program, set_io_mapping);
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
if(emit_flags & EVOASM_PROGRAM_EMIT_FLAG_EMIT_IO_LOAD_STORE) {
|
|
1050
|
+
EVOASM_TRY(error, evoasm_program_x64_emit_io_load_store, program, input, set_io_mapping);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_DEBUG);
|
|
1054
|
+
|
|
1055
|
+
return true;
|
|
1056
|
+
|
|
1057
|
+
error:
|
|
1058
|
+
return false;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
|
|
1062
|
+
typedef enum {
|
|
1063
|
+
EVOASM_METRIC_ABSDIFF,
|
|
1064
|
+
EVOASM_METRIC_NONE
|
|
1065
|
+
} evoasm_metric;
|
|
1066
|
+
|
|
1067
|
+
static inline void
|
|
1068
|
+
evoasm_program_update_dist_mat(evoasm_program_t *program,
|
|
1069
|
+
evoasm_kernel_t *kernel,
|
|
1070
|
+
evoasm_program_output_t *output,
|
|
1071
|
+
size_t height,
|
|
1072
|
+
size_t tuple_idx,
|
|
1073
|
+
double *dist_mat,
|
|
1074
|
+
evoasm_metric metric) {
|
|
1075
|
+
size_t width = kernel->n_output_regs;
|
|
1076
|
+
evoasm_program_io_val_t *io_vals = output->vals + tuple_idx * output->arity;
|
|
1077
|
+
|
|
1078
|
+
for(size_t i = 0; i < height; i++) {
|
|
1079
|
+
evoasm_program_io_val_t io_val = io_vals[i];
|
|
1080
|
+
evoasm_program_io_val_type_t tuple_type = output->types[i];
|
|
1081
|
+
double io_val_dbl = evoasm_program_io_val_to_dbl(io_val, tuple_type);
|
|
1082
|
+
|
|
1083
|
+
for(size_t j = 0; j < width; j++) {
|
|
1084
|
+
evoasm_program_io_val_t output_val = program->output_vals[tuple_idx * width + j];
|
|
1085
|
+
//uint8_t output_size = program->output_sizes[j];
|
|
1086
|
+
//switch(output_size) {
|
|
1087
|
+
//
|
|
1088
|
+
//}
|
|
1089
|
+
// FIXME: output is essentially just a bitstring and could be anything
|
|
1090
|
+
// an integer (both, signed or unsigned) a float or double.
|
|
1091
|
+
// Moreover, a portion of the output value could
|
|
1092
|
+
// hold the correct answer (e.g. lower 8 or 16 bits etc.).
|
|
1093
|
+
// For now we use the tuple output type and assume signedness.
|
|
1094
|
+
// This needs to be fixed.
|
|
1095
|
+
double output_val_dbl = evoasm_program_io_val_to_dbl(output_val, tuple_type);
|
|
1096
|
+
|
|
1097
|
+
switch(metric) {
|
|
1098
|
+
default:
|
|
1099
|
+
case EVOASM_METRIC_ABSDIFF: {
|
|
1100
|
+
double dist = fabs(output_val_dbl - io_val_dbl);
|
|
1101
|
+
dist_mat[i * width + j] += dist;
|
|
1102
|
+
break;
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
static void
|
|
1110
|
+
evoasm_program_log_program_output(evoasm_program_t *program,
|
|
1111
|
+
evoasm_kernel_t *kernel,
|
|
1112
|
+
evoasm_program_output_t *output,
|
|
1113
|
+
uint_fast8_t *const matching,
|
|
1114
|
+
evoasm_log_level_t log_level) {
|
|
1115
|
+
|
|
1116
|
+
size_t n_tuples = EVOASM_PROGRAM_OUTPUT_N_TUPLES(output);
|
|
1117
|
+
size_t height = output->arity;
|
|
1118
|
+
size_t width = kernel->n_output_regs;
|
|
1119
|
+
|
|
1120
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, "OUTPUT MATRICES:\n");
|
|
1121
|
+
|
|
1122
|
+
for(size_t i = 0; i < width; i++) {
|
|
1123
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " %d ", kernel->output_regs.x64[i]);
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
|
|
1127
|
+
|
|
1128
|
+
for(size_t i = 0; i < n_tuples; i++) {
|
|
1129
|
+
for(size_t j = 0; j < height; j++) {
|
|
1130
|
+
for(size_t k = 0; k < width; k++) {
|
|
1131
|
+
bool matched = matching[j] == k;
|
|
1132
|
+
evoasm_program_io_val_t val = program->output_vals[i * width + k];
|
|
1133
|
+
|
|
1134
|
+
if(matched) {
|
|
1135
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[1m ");
|
|
1136
|
+
}
|
|
1137
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " %ld (%f)\t ", val.i64, val.f64);
|
|
1138
|
+
if(matched) {
|
|
1139
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[0m ");
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
|
|
1143
|
+
}
|
|
1144
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
static void
|
|
1149
|
+
evoasm_program_log_dist_dist_mat(evoasm_program_t *program,
|
|
1150
|
+
evoasm_kernel_t *kernel,
|
|
1151
|
+
size_t height,
|
|
1152
|
+
double *dist_mat,
|
|
1153
|
+
uint_fast8_t *matching,
|
|
1154
|
+
evoasm_log_level_t log_level) {
|
|
1155
|
+
|
|
1156
|
+
size_t width = kernel->n_output_regs;
|
|
1157
|
+
|
|
1158
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, "DIST MATRIX: (%zu, %zu)\n", height, width);
|
|
1159
|
+
for(size_t i = 0; i < height; i++) {
|
|
1160
|
+
for(size_t j = 0; j < width; j++) {
|
|
1161
|
+
if(matching[i] == j) {
|
|
1162
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[1m ");
|
|
1163
|
+
}
|
|
1164
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " %.2g\t ", dist_mat[i * width + j]);
|
|
1165
|
+
if(matching[i] == j) {
|
|
1166
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \x1b[0m ");
|
|
1167
|
+
}
|
|
1168
|
+
}
|
|
1169
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
|
|
1170
|
+
}
|
|
1171
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \n\n ");
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
|
|
1175
|
+
static inline bool
|
|
1176
|
+
evoasm_program_match(evoasm_program_t *program,
|
|
1177
|
+
size_t width,
|
|
1178
|
+
double *dist_mat,
|
|
1179
|
+
uint_fast8_t *matching) {
|
|
1180
|
+
|
|
1181
|
+
uint_fast8_t best_index = UINT_FAST8_MAX;
|
|
1182
|
+
double best_dist = INFINITY;
|
|
1183
|
+
uint_fast8_t i;
|
|
1184
|
+
|
|
1185
|
+
for(i = 0; i < width; i++) {
|
|
1186
|
+
double v = dist_mat[i];
|
|
1187
|
+
if(v < best_dist) {
|
|
1188
|
+
best_dist = v;
|
|
1189
|
+
best_index = i;
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
if(evoasm_likely(best_index != UINT_FAST8_MAX)) {
|
|
1194
|
+
*matching = best_index;
|
|
1195
|
+
return true;
|
|
1196
|
+
} else {
|
|
1197
|
+
/*evoasm_program_log_dist_dist_mat(program,
|
|
1198
|
+
1,
|
|
1199
|
+
dist_mat,
|
|
1200
|
+
matching,
|
|
1201
|
+
EVOASM_LOG_LEVEL_WARN);
|
|
1202
|
+
evoasm_assert_not_reached();*/
|
|
1203
|
+
/*
|
|
1204
|
+
* Might happen if all elements are inf or nan
|
|
1205
|
+
*/
|
|
1206
|
+
return false;
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
static inline void
|
|
1211
|
+
evoasm_program_calc_stable_matching(evoasm_program_t *program,
|
|
1212
|
+
evoasm_kernel_t *kernel,
|
|
1213
|
+
size_t height,
|
|
1214
|
+
double *dist_mat,
|
|
1215
|
+
uint_fast8_t *matching) {
|
|
1216
|
+
|
|
1217
|
+
uint_fast8_t width = (uint_fast8_t) kernel->n_output_regs;
|
|
1218
|
+
uint_fast8_t *inv_matching = evoasm_alloca(width * sizeof(uint_fast8_t));
|
|
1219
|
+
uint_fast8_t i;
|
|
1220
|
+
|
|
1221
|
+
// calculates a stable matching
|
|
1222
|
+
for(i = 0; i < height; i++) {
|
|
1223
|
+
matching[i] = UINT_FAST8_MAX;
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
for(i = 0; i < width; i++) {
|
|
1227
|
+
inv_matching[i] = UINT_FAST8_MAX;
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
while(true) {
|
|
1231
|
+
uint_fast8_t unmatched_index = UINT_FAST8_MAX;
|
|
1232
|
+
uint_fast8_t best_index = UINT_FAST8_MAX;
|
|
1233
|
+
double best_dist = INFINITY;
|
|
1234
|
+
|
|
1235
|
+
for(i = 0; i < height; i++) {
|
|
1236
|
+
if(matching[i] == UINT_FAST8_MAX) {
|
|
1237
|
+
unmatched_index = i;
|
|
1238
|
+
break;
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
if(unmatched_index == UINT_FAST8_MAX) {
|
|
1243
|
+
break;
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
for(i = 0; i < width; i++) {
|
|
1247
|
+
double v = dist_mat[unmatched_index * width + i];
|
|
1248
|
+
if(v < best_dist) {
|
|
1249
|
+
best_dist = v;
|
|
1250
|
+
best_index = i;
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
if(evoasm_likely(best_index != UINT_FAST8_MAX)) {
|
|
1255
|
+
if(inv_matching[best_index] == UINT_FAST8_MAX) {
|
|
1256
|
+
inv_matching[best_index] = unmatched_index;
|
|
1257
|
+
matching[unmatched_index] = best_index;
|
|
1258
|
+
} else {
|
|
1259
|
+
if(dist_mat[inv_matching[best_index] * width + best_index] > best_dist) {
|
|
1260
|
+
matching[inv_matching[best_index]] = UINT_FAST8_MAX;
|
|
1261
|
+
inv_matching[best_index] = unmatched_index;
|
|
1262
|
+
matching[unmatched_index] = best_index;
|
|
1263
|
+
} else {
|
|
1264
|
+
//dist_mat[unmatched_index * width + i] = copysign(best_dist, -1.0);
|
|
1265
|
+
dist_mat[unmatched_index * width + i] = INFINITY;
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
} else {
|
|
1269
|
+
evoasm_program_log_dist_dist_mat(program,
|
|
1270
|
+
kernel,
|
|
1271
|
+
height,
|
|
1272
|
+
dist_mat,
|
|
1273
|
+
matching,
|
|
1274
|
+
EVOASM_LOG_LEVEL_DEBUG);
|
|
1275
|
+
evoasm_assert_not_reached();
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
|
|
1281
|
+
static inline evoasm_loss_t
|
|
1282
|
+
evoasm_program_calc_loss(evoasm_program_t *program,
|
|
1283
|
+
evoasm_kernel_t *kernel,
|
|
1284
|
+
size_t height,
|
|
1285
|
+
double *dist_mat,
|
|
1286
|
+
uint_fast8_t *matching) {
|
|
1287
|
+
size_t width = kernel->n_output_regs;
|
|
1288
|
+
double scale = 1.0 / (double) width;
|
|
1289
|
+
double loss = 0.0;
|
|
1290
|
+
|
|
1291
|
+
for(size_t i = 0; i < height; i++) {
|
|
1292
|
+
loss += (scale * dist_mat[i * width + matching[i]]);
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
return (evoasm_loss_t) loss;
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
|
|
1299
|
+
static evoasm_loss_t
|
|
1300
|
+
evoasm_program_assess(evoasm_program_t *program,
|
|
1301
|
+
evoasm_program_output_t *output) {
|
|
1302
|
+
|
|
1303
|
+
size_t n_tuples = EVOASM_PROGRAM_OUTPUT_N_TUPLES(output);
|
|
1304
|
+
size_t height = output->arity;
|
|
1305
|
+
evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
|
|
1306
|
+
size_t width = kernel->n_output_regs;
|
|
1307
|
+
size_t dist_mat_len = (size_t) (width * height);
|
|
1308
|
+
double *dist_mat = evoasm_alloca(dist_mat_len * sizeof(double));
|
|
1309
|
+
uint_fast8_t *matching = evoasm_alloca(height * sizeof(uint_fast8_t));
|
|
1310
|
+
evoasm_loss_t loss;
|
|
1311
|
+
|
|
1312
|
+
for(size_t i = 0; i < dist_mat_len; i++) {
|
|
1313
|
+
dist_mat[i] = 0.0;
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
if(height == 1) {
|
|
1317
|
+
/* COMMON FAST-PATH */
|
|
1318
|
+
for(size_t i = 0; i < n_tuples; i++) {
|
|
1319
|
+
evoasm_program_update_dist_mat(program, kernel, output, 1, i, dist_mat, EVOASM_METRIC_ABSDIFF);
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
if(evoasm_program_match(program, width, dist_mat, matching)) {
|
|
1323
|
+
loss = evoasm_program_calc_loss(program, kernel, 1, dist_mat, matching);
|
|
1324
|
+
} else {
|
|
1325
|
+
loss = INFINITY;
|
|
1326
|
+
}
|
|
1327
|
+
} else {
|
|
1328
|
+
for(size_t i = 0; i < n_tuples; i++) {
|
|
1329
|
+
evoasm_program_update_dist_mat(program, kernel, output, height, i, dist_mat, EVOASM_METRIC_ABSDIFF);
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
evoasm_program_calc_stable_matching(program, kernel, height, dist_mat, matching);
|
|
1333
|
+
loss = evoasm_program_calc_loss(program, kernel, height, dist_mat, matching);
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
for(size_t i = 0; i < height; i++) {
|
|
1338
|
+
switch(program->arch_info->id) {
|
|
1339
|
+
case EVOASM_ARCH_X64: {
|
|
1340
|
+
program->output_regs[i] = kernel->output_regs.x64[matching[i]];
|
|
1341
|
+
break;
|
|
1342
|
+
}
|
|
1343
|
+
default:
|
|
1344
|
+
evoasm_assert_not_reached();
|
|
1345
|
+
}
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
#if EVOASM_LOG_LEVEL <= EVOASM_LOG_LEVEL_DEBUG
|
|
1349
|
+
if(loss == 0.0) {
|
|
1350
|
+
evoasm_program_log_program_output(program,
|
|
1351
|
+
kernel,
|
|
1352
|
+
output,
|
|
1353
|
+
matching,
|
|
1354
|
+
EVOASM_LOG_LEVEL_DEBUG);
|
|
1355
|
+
}
|
|
1356
|
+
#endif
|
|
1357
|
+
|
|
1358
|
+
return loss;
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
static void
|
|
1362
|
+
evoasm_program_reset_recur_counters(evoasm_program_t *program) {
|
|
1363
|
+
memset(program->recur_counters, 0, sizeof(program->recur_counters[0]) * program->size);
|
|
1364
|
+
}
|
|
1365
|
+
|
|
1366
|
+
static inline evoasm_loss_t
|
|
1367
|
+
evoasm_program_eval_(evoasm_program_t *program,
|
|
1368
|
+
evoasm_program_output_t *output) {
|
|
1369
|
+
|
|
1370
|
+
evoasm_kernel_t *last_kernel = &program->kernels[program->size - 1];
|
|
1371
|
+
evoasm_loss_t loss;
|
|
1372
|
+
|
|
1373
|
+
if(evoasm_unlikely(last_kernel->n_output_regs == 0)) {
|
|
1374
|
+
evoasm_log_info("program %p has no output", (void *) program);
|
|
1375
|
+
return INFINITY;
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
evoasm_program_reset_recur_counters(program);
|
|
1379
|
+
|
|
1380
|
+
evoasm_signal_set_exception_mask(program->exception_mask);
|
|
1381
|
+
|
|
1382
|
+
#ifdef EVOASM_ENABLE_PARANOID_MODE
|
|
1383
|
+
for(size_t i = 0; i < program->size; i++) {
|
|
1384
|
+
evoasm_kernel_t *kernel = &program->kernels[i];
|
|
1385
|
+
for(size_t j = 0; j < EVOASM_X64_REG_NONE; j++) {
|
|
1386
|
+
kernel->rand_vals[j].i64 = rand() | (rand() << (rand() % 24));
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
#endif
|
|
1390
|
+
|
|
1391
|
+
if(EVOASM_SIGNAL_TRY()) {
|
|
1392
|
+
evoasm_buf_exec(program->buf);
|
|
1393
|
+
loss = evoasm_program_assess(program, output);
|
|
1394
|
+
} else {
|
|
1395
|
+
evoasm_log_debug("program %p signaled", (void *) program);
|
|
1396
|
+
loss = INFINITY;
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
evoasm_signal_clear_exception_mask();
|
|
1400
|
+
|
|
1401
|
+
return loss;
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
evoasm_loss_t
|
|
1405
|
+
evoasm_program_eval(evoasm_program_t *program,
|
|
1406
|
+
evoasm_program_output_t *output) {
|
|
1407
|
+
|
|
1408
|
+
evoasm_loss_t loss = evoasm_program_eval_(program, output);
|
|
1409
|
+
|
|
1410
|
+
#ifdef EVOASM_ENABLE_PARANOID_MODE
|
|
1411
|
+
for(size_t i = 0; i < 10; i++) {
|
|
1412
|
+
evoasm_loss_t loss_ = evoasm_program_eval_(program, output);
|
|
1413
|
+
|
|
1414
|
+
if(loss_ != loss) {
|
|
1415
|
+
evoasm_program_log(program, EVOASM_LOG_LEVEL_WARN);
|
|
1416
|
+
evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_WARN);
|
|
1417
|
+
}
|
|
1418
|
+
assert(loss_ == loss);
|
|
1419
|
+
}
|
|
1420
|
+
#endif
|
|
1421
|
+
|
|
1422
|
+
return loss;
|
|
1423
|
+
}
|
|
1424
|
+
|
|
1425
|
+
static evoasm_program_output_t *
|
|
1426
|
+
evoasm_program_load_output(evoasm_program_t *program,
|
|
1427
|
+
evoasm_kernel_t *kernel,
|
|
1428
|
+
evoasm_program_input_t *input) {
|
|
1429
|
+
|
|
1430
|
+
size_t width = kernel->n_output_regs;
|
|
1431
|
+
evoasm_program_output_t *output = &program->_output;
|
|
1432
|
+
size_t height = output->arity;
|
|
1433
|
+
size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
|
|
1434
|
+
uint_fast8_t *matching = evoasm_alloca(height * sizeof(uint_fast8_t));
|
|
1435
|
+
|
|
1436
|
+
evoasm_program_output_t *load_output = evoasm_program_io_alloc(
|
|
1437
|
+
(uint16_t) (EVOASM_PROGRAM_INPUT_N_TUPLES(input) * height));
|
|
1438
|
+
|
|
1439
|
+
for(size_t i = 0; i < height; i++) {
|
|
1440
|
+
for(size_t j = 0; j < kernel->n_output_regs; j++) {
|
|
1441
|
+
if(program->output_regs[i] == kernel->output_regs.x64[j]) {
|
|
1442
|
+
matching[i] = (uint_fast8_t) j;
|
|
1443
|
+
goto next;
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
evoasm_log_fatal("program output reg %d not found in kernel output regs", program->output_regs[i]);
|
|
1447
|
+
evoasm_assert_not_reached();
|
|
1448
|
+
next:;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
for(size_t i = 0; i < n_tuples; i++) {
|
|
1452
|
+
for(size_t j = 0; j < height; j++) {
|
|
1453
|
+
load_output->vals[i * height + j] = program->output_vals[i * width + matching[j]];
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
|
|
1457
|
+
load_output->arity = output->arity;
|
|
1458
|
+
memcpy(load_output->types, output->types, EVOASM_ARY_LEN(output->types));
|
|
1459
|
+
|
|
1460
|
+
#if EVOASM_LOG_LEVEL <= EVOASM_LOG_LEVEL_DEBUG
|
|
1461
|
+
evoasm_program_log_program_output(program,
|
|
1462
|
+
kernel,
|
|
1463
|
+
load_output,
|
|
1464
|
+
matching,
|
|
1465
|
+
EVOASM_LOG_LEVEL_DEBUG);
|
|
1466
|
+
#endif
|
|
1467
|
+
return load_output;
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1470
|
+
evoasm_program_output_t *
|
|
1471
|
+
evoasm_program_run(evoasm_program_t *program,
|
|
1472
|
+
evoasm_program_input_t *input) {
|
|
1473
|
+
evoasm_kernel_t *kernel = &program->kernels[program->size - 1];
|
|
1474
|
+
evoasm_program_output_t *output;
|
|
1475
|
+
|
|
1476
|
+
if(input->arity != program->_input.arity) {
|
|
1477
|
+
evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
|
|
1478
|
+
"arity mismatch (%d for %d)", input->arity, program->_input.arity);
|
|
1479
|
+
return NULL;
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
size_t n_tuples = EVOASM_PROGRAM_INPUT_N_TUPLES(input);
|
|
1483
|
+
if(n_tuples > program->max_tuples) {
|
|
1484
|
+
evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
|
|
1485
|
+
"Maximum number of input/output tuples exceeded (%zu > %d)", n_tuples, program->max_tuples);
|
|
1486
|
+
return NULL;
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
for(size_t i = 0; i < input->arity; i++) {
|
|
1490
|
+
if(input->types[i] != program->_input.types[i]) {
|
|
1491
|
+
evoasm_error(EVOASM_ERROR_TYPE_PROGRAM, EVOASM_ERROR_CODE_NONE,
|
|
1492
|
+
"type mismatch (%d != %d)", input->types[i], program->_input.types[i]);
|
|
1493
|
+
return NULL;
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1497
|
+
evoasm_program_emit_flags_t emit_flags = EVOASM_PROGRAM_EMIT_FLAG_EMIT_IO_LOAD_STORE;
|
|
1498
|
+
if(!evoasm_program_emit(program, input, emit_flags)) {
|
|
1499
|
+
return NULL;
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
evoasm_buf_log(program->buf, EVOASM_LOG_LEVEL_DEBUG);
|
|
1503
|
+
evoasm_signal_set_exception_mask(program->exception_mask);
|
|
1504
|
+
|
|
1505
|
+
if(!evoasm_buf_protect(program->buf, EVOASM_MPROT_MODE_RX)) {
|
|
1506
|
+
evoasm_assert_not_reached();
|
|
1507
|
+
}
|
|
1508
|
+
|
|
1509
|
+
evoasm_program_reset_recur_counters(program);
|
|
1510
|
+
|
|
1511
|
+
if(EVOASM_SIGNAL_TRY()) {
|
|
1512
|
+
evoasm_buf_exec(program->buf);
|
|
1513
|
+
output = evoasm_program_load_output(program,
|
|
1514
|
+
kernel,
|
|
1515
|
+
input);
|
|
1516
|
+
} else {
|
|
1517
|
+
evoasm_log_debug("signaled\n");
|
|
1518
|
+
output = NULL;
|
|
1519
|
+
}
|
|
1520
|
+
|
|
1521
|
+
if(!evoasm_buf_protect(program->buf, EVOASM_MPROT_MODE_RW)) {
|
|
1522
|
+
evoasm_assert_not_reached();
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
evoasm_signal_clear_exception_mask();
|
|
1526
|
+
|
|
1527
|
+
return output;
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
evoasm_success_t
|
|
1531
|
+
evoasm_program_emit(evoasm_program_t *program,
|
|
1532
|
+
evoasm_program_input_t *input,
|
|
1533
|
+
evoasm_program_emit_flags_t emit_flags) {
|
|
1534
|
+
switch(program->arch_info->id) {
|
|
1535
|
+
case EVOASM_ARCH_X64: {
|
|
1536
|
+
return evoasm_program_x64_emit(program, input,
|
|
1537
|
+
emit_flags);
|
|
1538
|
+
break;
|
|
1539
|
+
}
|
|
1540
|
+
default:
|
|
1541
|
+
evoasm_assert_not_reached();
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
static size_t
|
|
1546
|
+
evoasm_program_x64_find_writers_(evoasm_program_t *program, evoasm_kernel_t *kernel, evoasm_reg_id_t reg_id,
|
|
1547
|
+
size_t idx, size_t *writers) {
|
|
1548
|
+
size_t len = 0;
|
|
1549
|
+
for(int i = (int) idx; i >= 0; i--) {
|
|
1550
|
+
evoasm_x64_reg_liveness_t reg_liveness;
|
|
1551
|
+
evoasm_x64_reg_liveness_init(®_liveness);
|
|
1552
|
+
|
|
1553
|
+
if(evoasm_kernel_is_writing_inst_x64(kernel, (size_t) i, reg_id, ®_liveness)) {
|
|
1554
|
+
writers[len++] = (size_t) i;
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
return len;
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
static size_t
|
|
1561
|
+
evoasm_program_x64_find_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
|
|
1562
|
+
evoasm_reg_id_t reg_id, size_t idx, size_t *writers) {
|
|
1563
|
+
|
|
1564
|
+
return evoasm_program_x64_find_writers_(program, kernel, reg_id, idx, writers);
|
|
1565
|
+
}
|
|
1566
|
+
|
|
1567
|
+
typedef struct {
|
|
1568
|
+
bool change;
|
|
1569
|
+
evoasm_bitmap1024_t inst_bitmaps[EVOASM_PROGRAM_MAX_SIZE];
|
|
1570
|
+
evoasm_bitmap256_t output_reg_bitmaps[EVOASM_PROGRAM_MAX_SIZE];
|
|
1571
|
+
} evoasm_program_intron_elimination_ctx;
|
|
1572
|
+
|
|
1573
|
+
static void
|
|
1574
|
+
evoasm_program_x64_mark_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
|
|
1575
|
+
evoasm_reg_id_t reg_id, size_t idx, evoasm_program_intron_elimination_ctx *ctx) {
|
|
1576
|
+
size_t writers[16];
|
|
1577
|
+
|
|
1578
|
+
size_t writers_len = evoasm_program_x64_find_writers(program, kernel, reg_id, idx, writers);
|
|
1579
|
+
|
|
1580
|
+
if(writers_len > 0) {
|
|
1581
|
+
for(size_t i = 0; i < writers_len; i++) {
|
|
1582
|
+
size_t writer_idx = writers[i];
|
|
1583
|
+
evoasm_bitmap_t *inst_bitmap = (evoasm_bitmap_t *) &ctx->inst_bitmaps[kernel->idx];
|
|
1584
|
+
if(evoasm_bitmap_get(inst_bitmap, writer_idx)) continue;
|
|
1585
|
+
|
|
1586
|
+
evoasm_x64_inst_t *x64_inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[writer_idx]);
|
|
1587
|
+
evoasm_bitmap_set(inst_bitmap, writer_idx);
|
|
1588
|
+
ctx->change = true;
|
|
1589
|
+
|
|
1590
|
+
for(size_t j = 0; j < x64_inst->n_operands; j++) {
|
|
1591
|
+
evoasm_x64_operand_t *op = &x64_inst->operands[j];
|
|
1592
|
+
evoasm_x64_reg_id_t op_reg_id = evoasm_kernel_get_operand_reg_id_x64(kernel, op, (uint16_t) writer_idx);
|
|
1593
|
+
|
|
1594
|
+
if(op->read) {
|
|
1595
|
+
if(writer_idx > 0) {
|
|
1596
|
+
evoasm_program_x64_mark_writers(program, kernel, op_reg_id, writer_idx - 1u, ctx);
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
if(kernel->reg_info.x64.regs[op_reg_id].input) {
|
|
1600
|
+
for(int k = kernel->idx - 1; k >= 0; k--) {
|
|
1601
|
+
size_t trans_idx = SIZE_MAX;
|
|
1602
|
+
if(k + 1 == kernel->idx) {
|
|
1603
|
+
trans_idx = 0;
|
|
1604
|
+
} else if(evoasm_program_branch_kernel_idx(program, (size_t) k) == kernel->idx) {
|
|
1605
|
+
trans_idx = 1;
|
|
1606
|
+
};
|
|
1607
|
+
|
|
1608
|
+
if(trans_idx != SIZE_MAX) {
|
|
1609
|
+
evoasm_kernel_t *trans_kernel = &program->kernels[k];
|
|
1610
|
+
|
|
1611
|
+
for(size_t l = 0; l < EVOASM_X64_REG_NONE; l++) {
|
|
1612
|
+
if(trans_kernel->reg_info.x64.trans_regs[trans_idx][op_reg_id] == l) {
|
|
1613
|
+
evoasm_bitmap_set((evoasm_bitmap_t *) &ctx->output_reg_bitmaps[k], l);
|
|
1614
|
+
}
|
|
1615
|
+
}
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1621
|
+
}
|
|
1622
|
+
}
|
|
1623
|
+
}
|
|
1624
|
+
|
|
1625
|
+
static void
|
|
1626
|
+
evoasm_program_mark_writers(evoasm_program_t *program, evoasm_kernel_t *kernel,
|
|
1627
|
+
evoasm_reg_id_t reg_id, size_t index, evoasm_program_intron_elimination_ctx *ctx) {
|
|
1628
|
+
switch(program->arch_info->id) {
|
|
1629
|
+
case EVOASM_ARCH_X64: {
|
|
1630
|
+
evoasm_program_x64_mark_writers(program, kernel, reg_id, index, ctx);
|
|
1631
|
+
break;
|
|
1632
|
+
}
|
|
1633
|
+
default:
|
|
1634
|
+
evoasm_assert_not_reached();
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
|
|
1638
|
+
static evoasm_success_t
|
|
1639
|
+
evoasm_program_mark_kernel(evoasm_program_t *program, evoasm_kernel_t *kernel,
|
|
1640
|
+
evoasm_program_intron_elimination_ctx *ctx) {
|
|
1641
|
+
for(size_t i = 0; i < EVOASM_X64_REG_NONE; i++) {
|
|
1642
|
+
evoasm_bitmap_t *bitmap = (evoasm_bitmap_t *) &ctx->output_reg_bitmaps[kernel->idx];
|
|
1643
|
+
if(evoasm_bitmap_get(bitmap, i)) {
|
|
1644
|
+
evoasm_program_mark_writers(program, kernel, (evoasm_reg_id_t) i, (size_t) (kernel->size - 1),
|
|
1645
|
+
ctx);
|
|
1646
|
+
}
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
return true;
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
evoasm_success_t
|
|
1653
|
+
evoasm_program_eliminate_introns(evoasm_program_t *program, evoasm_program_t *dst_program) {
|
|
1654
|
+
size_t last_kernel_idx = (size_t) (program->size - 1);
|
|
1655
|
+
evoasm_program_intron_elimination_ctx ctx = {0};
|
|
1656
|
+
|
|
1657
|
+
//evoasm_kernel_t *last_kernel = &program->kernels[last_kernel_idx];
|
|
1658
|
+
|
|
1659
|
+
EVOASM_TRY(error, evoasm_program_init,
|
|
1660
|
+
dst_program,
|
|
1661
|
+
program->arch_info,
|
|
1662
|
+
program->size,
|
|
1663
|
+
program->kernels[0].size,
|
|
1664
|
+
program->max_tuples,
|
|
1665
|
+
program->recur_limit,
|
|
1666
|
+
false);
|
|
1667
|
+
|
|
1668
|
+
evoasm_bitmap_t *output_bitmap = (evoasm_bitmap_t *) &ctx.output_reg_bitmaps[last_kernel_idx];
|
|
1669
|
+
for(size_t i = 0; i < program->_output.arity; i++) {
|
|
1670
|
+
evoasm_bitmap_set(output_bitmap, program->output_regs[i]);
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
do {
|
|
1674
|
+
ctx.change = false;
|
|
1675
|
+
for(int i = (int) last_kernel_idx; i >= 0; i--) {
|
|
1676
|
+
EVOASM_TRY(error, evoasm_program_mark_kernel, program,
|
|
1677
|
+
&program->kernels[i], &ctx);
|
|
1678
|
+
}
|
|
1679
|
+
} while(ctx.change);
|
|
1680
|
+
|
|
1681
|
+
/* sweep */
|
|
1682
|
+
for(size_t i = 0; i <= last_kernel_idx; i++) {
|
|
1683
|
+
evoasm_kernel_t *kernel = &program->kernels[i];
|
|
1684
|
+
evoasm_kernel_t *dst_kernel = &dst_program->kernels[i];
|
|
1685
|
+
evoasm_bitmap_t *inst_bitmap = (evoasm_bitmap_t *) &ctx.inst_bitmaps[i];
|
|
1686
|
+
|
|
1687
|
+
size_t k = 0;
|
|
1688
|
+
for(size_t j = 0; j < kernel->size; j++) {
|
|
1689
|
+
if(evoasm_bitmap_get(inst_bitmap, j)) {
|
|
1690
|
+
dst_kernel->insts[k] = kernel->insts[j];
|
|
1691
|
+
dst_kernel->params.x64[k] = kernel->params.x64[j];
|
|
1692
|
+
k++;
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
if(dst_kernel != kernel) {
|
|
1697
|
+
dst_kernel->size = (uint16_t) k;
|
|
1698
|
+
dst_kernel->reg_info = kernel->reg_info;
|
|
1699
|
+
dst_kernel->output_regs = kernel->output_regs;
|
|
1700
|
+
dst_kernel->n_input_regs = kernel->n_input_regs;
|
|
1701
|
+
dst_kernel->n_output_regs = kernel->n_output_regs;
|
|
1702
|
+
}
|
|
1703
|
+
}
|
|
1704
|
+
|
|
1705
|
+
if(dst_program != program) {
|
|
1706
|
+
dst_program->_input = program->_input;
|
|
1707
|
+
dst_program->_output = program->_output;
|
|
1708
|
+
memcpy(dst_program->output_regs, program->output_regs, sizeof(program->output_regs));
|
|
1709
|
+
EVOASM_MEMCPY_N(dst_program->jmp_offs, program->jmp_offs, program->size);
|
|
1710
|
+
EVOASM_MEMCPY_N(dst_program->jmp_conds, program->jmp_conds, program->size);
|
|
1711
|
+
}
|
|
1712
|
+
|
|
1713
|
+
evoasm_program_emit_flags_t emit_flags =
|
|
1714
|
+
EVOASM_PROGRAM_EMIT_FLAG_PREPARE |
|
|
1715
|
+
EVOASM_PROGRAM_EMIT_FLAG_EMIT_KERNELS;
|
|
1716
|
+
|
|
1717
|
+
EVOASM_TRY(error, evoasm_program_emit, dst_program, NULL, emit_flags);
|
|
1718
|
+
|
|
1719
|
+
return true;
|
|
1720
|
+
error:
|
|
1721
|
+
return false;
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
|
|
1725
|
+
#define EVOASM_PROGRAM_PROLOG_EPILOG_SIZE UINT32_C(1024)
|
|
1726
|
+
#define EVOASM_PROGRAM_TRANSITION_SIZE UINT32_C(512)
|
|
1727
|
+
|
|
1728
|
+
|
|
1729
|
+
evoasm_success_t
|
|
1730
|
+
evoasm_program_init(evoasm_program_t *program,
|
|
1731
|
+
evoasm_arch_info_t *arch_info,
|
|
1732
|
+
size_t program_size,
|
|
1733
|
+
size_t kernel_size,
|
|
1734
|
+
size_t max_tuples,
|
|
1735
|
+
size_t recur_limit,
|
|
1736
|
+
bool shallow) {
|
|
1737
|
+
|
|
1738
|
+
static evoasm_program_t zero_program = {0};
|
|
1739
|
+
size_t n_transitions = program_size - 1u;
|
|
1740
|
+
|
|
1741
|
+
*program = zero_program;
|
|
1742
|
+
program->arch_info = arch_info;
|
|
1743
|
+
program->recur_limit = (uint32_t) recur_limit;
|
|
1744
|
+
program->shallow = shallow;
|
|
1745
|
+
program->size = (uint16_t) program_size;
|
|
1746
|
+
program->max_tuples = (uint16_t) max_tuples;
|
|
1747
|
+
|
|
1748
|
+
size_t body_buf_size =
|
|
1749
|
+
(size_t) (n_transitions * EVOASM_PROGRAM_TRANSITION_SIZE
|
|
1750
|
+
+ program_size * kernel_size * program->arch_info->max_inst_len);
|
|
1751
|
+
|
|
1752
|
+
size_t buf_size = max_tuples * (body_buf_size + EVOASM_PROGRAM_PROLOG_EPILOG_SIZE);
|
|
1753
|
+
|
|
1754
|
+
EVOASM_TRY(error, evoasm_buf_init, &program->_buf, EVOASM_BUF_TYPE_MMAP, buf_size);
|
|
1755
|
+
program->buf = &program->_buf;
|
|
1756
|
+
|
|
1757
|
+
EVOASM_TRY(error, evoasm_buf_init, &program->_body_buf, EVOASM_BUF_TYPE_MALLOC, body_buf_size);
|
|
1758
|
+
program->body_buf = &program->_body_buf;
|
|
1759
|
+
|
|
1760
|
+
EVOASM_TRY(error, evoasm_buf_protect, &program->_buf,
|
|
1761
|
+
EVOASM_MPROT_MODE_RWX);
|
|
1762
|
+
|
|
1763
|
+
size_t output_vals_len = max_tuples * EVOASM_KERNEL_MAX_OUTPUT_REGS;
|
|
1764
|
+
|
|
1765
|
+
EVOASM_TRY_ALLOC(error, calloc, program->output_vals, output_vals_len, sizeof(evoasm_program_io_val_t));
|
|
1766
|
+
EVOASM_TRY_ALLOC(error, calloc, program->kernels, program_size, sizeof(evoasm_kernel_t));
|
|
1767
|
+
EVOASM_TRY_ALLOC(error, calloc, program->recur_counters, program_size, sizeof(uint32_t));
|
|
1768
|
+
EVOASM_TRY_ALLOC(error, calloc, program->jmp_conds, program_size, sizeof(uint8_t));
|
|
1769
|
+
EVOASM_TRY_ALLOC(error, calloc, program->jmp_offs, program_size, sizeof(int16_t));
|
|
1770
|
+
|
|
1771
|
+
for(uint16_t i = 0; i < program_size; i++) {
|
|
1772
|
+
evoasm_kernel_t *kernel = &program->kernels[i];
|
|
1773
|
+
|
|
1774
|
+
kernel->idx = i;
|
|
1775
|
+
kernel->size = (uint16_t) kernel_size;
|
|
1776
|
+
|
|
1777
|
+
if(!shallow) {
|
|
1778
|
+
EVOASM_TRY_ALLOC(error, calloc, kernel->insts, kernel_size, sizeof(kernel->insts[0]));
|
|
1779
|
+
switch(program->arch_info->id) {
|
|
1780
|
+
case EVOASM_ARCH_X64: {
|
|
1781
|
+
EVOASM_TRY_ALLOC(error, calloc, kernel->params.x64, kernel_size, sizeof(kernel->params.x64[0]));
|
|
1782
|
+
break;
|
|
1783
|
+
}
|
|
1784
|
+
default:
|
|
1785
|
+
evoasm_assert_not_reached();
|
|
1786
|
+
}
|
|
1787
|
+
}
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
|
|
1791
|
+
return true;
|
|
1792
|
+
|
|
1793
|
+
error:
|
|
1794
|
+
EVOASM_TRY_WARN(evoasm_program_destroy, program);
|
|
1795
|
+
return false;
|
|
1796
|
+
}
|
|
1797
|
+
|
|
1798
|
+
void
|
|
1799
|
+
evoasm_kernel_log(evoasm_kernel_t *kernel, evoasm_arch_id_t arch_id, evoasm_log_level_t log_level) {
|
|
1800
|
+
if(_evoasm_log_level > log_level) return;
|
|
1801
|
+
|
|
1802
|
+
switch(arch_id) {
|
|
1803
|
+
case EVOASM_ARCH_X64:
|
|
1804
|
+
for(size_t i = 0; i < kernel->size; i++) {
|
|
1805
|
+
evoasm_x64_inst_t *inst = evoasm_x64_inst_((evoasm_x64_inst_id_t) kernel->insts[i]);
|
|
1806
|
+
const char *mnem = evoasm_x64_inst_get_mnem(inst);
|
|
1807
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, "%s", mnem);
|
|
1808
|
+
}
|
|
1809
|
+
break;
|
|
1810
|
+
default:
|
|
1811
|
+
evoasm_assert_not_reached();
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
void
|
|
1816
|
+
evoasm_program_log(evoasm_program_t *program, evoasm_log_level_t log_level) {
|
|
1817
|
+
if(_evoasm_log_level > log_level) return;
|
|
1818
|
+
|
|
1819
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, "Evoasm::Program: size: %d", program->size);
|
|
1820
|
+
|
|
1821
|
+
for(size_t i = 0; i < program->size; i++) {
|
|
1822
|
+
evoasm_kernel_log(&program->kernels[i], (evoasm_arch_id_t) program->arch_info->id, log_level);
|
|
1823
|
+
}
|
|
1824
|
+
evoasm_log(log_level, EVOASM_LOG_TAG, " \n ");
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
EVOASM_DEF_ALLOC_FREE_FUNCS(program)
|