llrb 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.gitmodules +4 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +5 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +56 -0
  8. data/README.md +311 -0
  9. data/Rakefile +30 -0
  10. data/bin/bm_app_fib +41 -0
  11. data/bin/bm_empty_method +33 -0
  12. data/bin/bm_loop_while +27 -0
  13. data/bin/bm_plus +33 -0
  14. data/bin/console +14 -0
  15. data/bin/loop_while.rb +5 -0
  16. data/bin/setup +8 -0
  17. data/ext/llrb/cfg.h +124 -0
  18. data/ext/llrb/compiler.c +987 -0
  19. data/ext/llrb/compiler/funcs.h +164 -0
  20. data/ext/llrb/compiler/stack.h +43 -0
  21. data/ext/llrb/cruby.h +42 -0
  22. data/ext/llrb/cruby/ccan/build_assert/build_assert.h +40 -0
  23. data/ext/llrb/cruby/ccan/check_type/check_type.h +63 -0
  24. data/ext/llrb/cruby/ccan/container_of/container_of.h +142 -0
  25. data/ext/llrb/cruby/ccan/list/list.h +773 -0
  26. data/ext/llrb/cruby/ccan/str/str.h +16 -0
  27. data/ext/llrb/cruby/internal.h +1774 -0
  28. data/ext/llrb/cruby/iseq.h +252 -0
  29. data/ext/llrb/cruby/method.h +213 -0
  30. data/ext/llrb/cruby/node.h +520 -0
  31. data/ext/llrb/cruby/probes_helper.h +43 -0
  32. data/ext/llrb/cruby/ruby_assert.h +54 -0
  33. data/ext/llrb/cruby/ruby_atomic.h +233 -0
  34. data/ext/llrb/cruby/thread_pthread.h +54 -0
  35. data/ext/llrb/cruby/vm_core.h +1646 -0
  36. data/ext/llrb/cruby/vm_debug.h +37 -0
  37. data/ext/llrb/cruby/vm_exec.h +182 -0
  38. data/ext/llrb/cruby/vm_opts.h +57 -0
  39. data/ext/llrb/cruby_extra/id.h +220 -0
  40. data/ext/llrb/cruby_extra/insns.inc +113 -0
  41. data/ext/llrb/cruby_extra/insns_info.inc +796 -0
  42. data/ext/llrb/cruby_extra/probes.h +80 -0
  43. data/ext/llrb/extconf.rb +102 -0
  44. data/ext/llrb/llrb.c +148 -0
  45. data/ext/llrb/optimizer.cc +118 -0
  46. data/ext/llrb/parser.c +191 -0
  47. data/ext/llrb/profiler.c +336 -0
  48. data/ext/llrb_insn_checkkeyword.c +20 -0
  49. data/ext/llrb_insn_checkmatch.c +28 -0
  50. data/ext/llrb_insn_concatarray.c +23 -0
  51. data/ext/llrb_insn_concatstrings.c +21 -0
  52. data/ext/llrb_insn_defined.c +9 -0
  53. data/ext/llrb_insn_getclassvariable.c +10 -0
  54. data/ext/llrb_insn_getinstancevariable.c +44 -0
  55. data/ext/llrb_insn_getlocal.c +14 -0
  56. data/ext/llrb_insn_getlocal_level0.c +8 -0
  57. data/ext/llrb_insn_getlocal_level1.c +8 -0
  58. data/ext/llrb_insn_getspecial.c +14 -0
  59. data/ext/llrb_insn_invokeblock.c +39 -0
  60. data/ext/llrb_insn_invokesuper.c +47 -0
  61. data/ext/llrb_insn_opt_aref.c +25 -0
  62. data/ext/llrb_insn_opt_aset.c +28 -0
  63. data/ext/llrb_insn_opt_div.c +32 -0
  64. data/ext/llrb_insn_opt_eq.c +57 -0
  65. data/ext/llrb_insn_opt_ge.c +28 -0
  66. data/ext/llrb_insn_opt_gt.c +38 -0
  67. data/ext/llrb_insn_opt_le.c +29 -0
  68. data/ext/llrb_insn_opt_lt.c +38 -0
  69. data/ext/llrb_insn_opt_ltlt.c +27 -0
  70. data/ext/llrb_insn_opt_minus.c +36 -0
  71. data/ext/llrb_insn_opt_mod.c +32 -0
  72. data/ext/llrb_insn_opt_mult.c +30 -0
  73. data/ext/llrb_insn_opt_neq.c +103 -0
  74. data/ext/llrb_insn_opt_plus.c +48 -0
  75. data/ext/llrb_insn_opt_send_without_block.c +45 -0
  76. data/ext/llrb_insn_opt_str_freeze.c +12 -0
  77. data/ext/llrb_insn_putspecialobject.c +23 -0
  78. data/ext/llrb_insn_send.c +49 -0
  79. data/ext/llrb_insn_setclassvariable.c +19 -0
  80. data/ext/llrb_insn_setconstant.c +23 -0
  81. data/ext/llrb_insn_setinstancevariable.c +48 -0
  82. data/ext/llrb_insn_setlocal.c +16 -0
  83. data/ext/llrb_insn_setlocal_level0.c +9 -0
  84. data/ext/llrb_insn_setlocal_level1.c +10 -0
  85. data/ext/llrb_insn_setspecial.c +15 -0
  86. data/ext/llrb_insn_splatarray.c +13 -0
  87. data/ext/llrb_insn_throw.c +11 -0
  88. data/ext/llrb_insn_trace.c +37 -0
  89. data/ext/llrb_push_result.c +14 -0
  90. data/ext/llrb_self_from_cfp.c +12 -0
  91. data/ext/llrb_set_pc.c +8 -0
  92. data/lib/llrb.rb +2 -0
  93. data/lib/llrb/jit.rb +76 -0
  94. data/lib/llrb/start.rb +2 -0
  95. data/lib/llrb/version.rb +3 -0
  96. data/llrb.gemspec +48 -0
  97. data/wercker.yml +31 -0
  98. metadata +227 -0
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'benchmark/ips'
5
+ require 'optparse'
6
+ require 'llrb'
7
+
8
+ preview = false
9
+ opt = OptionParser.new
10
+ opt.on('-p') { preview = true }
11
+ opt.parse!(ARGV)
12
+
13
+ ruby = Class.new
14
+ def ruby.script(n)
15
+ if n < 3
16
+ 1
17
+ else
18
+ script(n-1) + script(n-2)
19
+ end
20
+ end
21
+
22
+ llrb = Class.new
23
+ def llrb.script(n)
24
+ if n < 3
25
+ 1
26
+ else
27
+ script(n-1) + script(n-2)
28
+ end
29
+ end
30
+ LLRB::JIT.preview(llrb, :script) and return if preview
31
+
32
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_microsecond)
33
+ LLRB::JIT.compile(llrb, :script)
34
+ finished_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_microsecond)
35
+ puts "Compile Time: #{ "%.2fms" % ((finished_at - started_at) / 1000)}"
36
+
37
+ Benchmark.ips do |x|
38
+ x.report('Ruby') { ruby.script(34) }
39
+ x.report('LLRB') { llrb.script(34) }
40
+ x.compare!
41
+ end
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'benchmark/ips'
5
+ require 'optparse'
6
+ require 'llrb'
7
+
8
+ preview = false
9
+ opt = OptionParser.new
10
+ opt.on('-p') { preview = true }
11
+ opt.parse!(ARGV)
12
+
13
+ ruby = Class.new
14
+ def ruby.script
15
+ 1
16
+ end
17
+
18
+ llrb = Class.new
19
+ def llrb.script
20
+ 1
21
+ end
22
+ LLRB::JIT.preview(llrb, :script) and return if preview
23
+
24
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_microsecond)
25
+ LLRB::JIT.compile(llrb, :script)
26
+ finished_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_microsecond)
27
+ puts "Compile Time: #{ "%.2fms" % ((finished_at - started_at) / 1000)}"
28
+
29
+ Benchmark.ips do |x|
30
+ x.report('Ruby') { ruby.script }
31
+ x.report('LLRB') { llrb.script }
32
+ x.compare!
33
+ end
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ require 'llrb'
3
+ require 'benchmark/ips'
4
+
5
+ ruby = Class.new
6
+ def ruby.script
7
+ i = 0
8
+ while i< 30_000_000
9
+ i += 1
10
+ end
11
+ end
12
+
13
+ llrb = Class.new
14
+ def llrb.script
15
+ i = 0
16
+ while i< 30_000_000
17
+ i += 1
18
+ end
19
+ end
20
+
21
+ LLRB::JIT.compile(llrb, :script)
22
+
23
+ Benchmark.ips do |x|
24
+ x.report('Ruby') { ruby.script }
25
+ x.report('LLRB') { llrb.script }
26
+ x.compare!
27
+ end
@@ -0,0 +1,33 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'benchmark/ips'
5
+ require 'optparse'
6
+ require 'llrb'
7
+
8
+ preview = false
9
+ opt = OptionParser.new
10
+ opt.on('-p') { preview = true }
11
+ opt.parse!(ARGV)
12
+
13
+ ruby = Class.new
14
+ def ruby.script
15
+ 1 + 2 + 3 + 4 + 5
16
+ end
17
+
18
+ llrb = Class.new
19
+ def llrb.script
20
+ 1 + 2 + 3 + 4 + 5
21
+ end
22
+ LLRB::JIT.preview(llrb, :script) and return if preview
23
+
24
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_microsecond)
25
+ LLRB::JIT.compile(llrb, :script)
26
+ finished_at = Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_microsecond)
27
+ puts "Compile Time: #{ "%.2fms" % ((finished_at - started_at) / 1000)}"
28
+
29
+ Benchmark.ips do |x|
30
+ x.report('Ruby') { ruby.script }
31
+ x.report('LLRB') { llrb.script }
32
+ x.compare!
33
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "llrb"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ i = 0
3
+ while i< 600000000
4
+ i += 1
5
+ end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,124 @@
1
+ /*
2
+ * cfg.h: Has a Control Flow Graph struct definition shared by parser.c and compiler.c.
3
+ */
4
+
5
+ #ifndef LLRB_CFG_H
6
+ #define LLRB_CFG_H
7
+
8
+ #include <stdbool.h>
9
+ #include "llvm-c/Core.h"
10
+
11
+ struct llrb_basic_block {
12
+ // Fields set by parser:
13
+ unsigned int start; // Start index of ISeq body's iseq_encoded.
14
+ unsigned int end; // End index of ISeq body's iseq_encoded.
15
+ unsigned int incoming_size; // Size of incoming_starts.
16
+ unsigned int *incoming_starts; // Start indices of incoming basic blocks. This buffer is freed by llrb_destruct_cfg.
17
+ bool traversed; // Prevents infinite loop in `llrb_set_incoming_blocks_by` and used by compiler to judge reachable or not.
18
+
19
+ // Fields set by compiler:
20
+ LLVMBasicBlockRef ref; // LLVM's actual BasicBlock reference. This value is always available after `llrb_init_cfg_for_compile` is called.
21
+ LLVMValueRef phi; // Phi node to collect incoming values. This will be created if incoming_size > 1 and compiled time's stack size > 0.
22
+ bool compiled; // Prevents infinite loop in `llrb_compile_basic_block`.
23
+ };
24
+
25
+ // Holds Control-Flow-Graph-like data structure. Actually it's a buffer of graph nodes.
26
+ struct llrb_cfg {
27
+ struct llrb_basic_block* blocks; // This buffer is freed by llrb_destruct_cfg.
28
+ unsigned int size;
29
+ };
30
+
31
+ // Used by llrb_dump_cfg.
32
+ #include "cruby.h"
33
+ #include "cruby_extra/insns.inc"
34
+ #include "cruby_extra/insns_info.inc"
35
+
36
+ // Not using `rb_iseq_original_iseq` to avoid unnecessary memory allocation.
37
+ extern int rb_vm_insn_addr2insn(const void *addr);
38
+
39
+ static void
40
+ llrb_disasm_insns(const struct rb_iseq_constant_body *body, unsigned int start, unsigned int end)
41
+ {
42
+ for (unsigned int i = start; i <= end;) {
43
+ int insn = rb_vm_insn_addr2insn((void *)body->iseq_encoded[i]);
44
+ fprintf(stderr, " %04d %-27s [%-4s] ", i, insn_name(insn), insn_op_types(insn));
45
+
46
+ for (int j = 1; j < insn_len(insn); j++) {
47
+ VALUE op = body->iseq_encoded[i+j];
48
+ switch (insn_op_type(insn, j-1)) {
49
+ case TS_NUM:
50
+ fprintf(stderr, "%-4ld ", (rb_num_t)op);
51
+ break;
52
+ case TS_OFFSET:
53
+ fprintf(stderr, "%"PRIdVALUE" ", (VALUE)(i + j + op + 1));
54
+ break;
55
+ }
56
+ }
57
+ fprintf(stderr, "\n");
58
+ i += insn_len(insn);
59
+ }
60
+ fprintf(stderr, "\n");
61
+ }
62
+
63
+ static void
64
+ llrb_dump_catch_table(const struct iseq_catch_table *ct)
65
+ {
66
+ if (!ct) return;
67
+
68
+ fprintf(stderr, "-- LLRB: catch table (size=%d)----------------\n", ct->size);
69
+ for (unsigned int i = 0; i < ct->size; i++) {
70
+ const struct iseq_catch_table_entry *entry = &ct->entries[i];
71
+
72
+ switch (entry->type) {
73
+ case CATCH_TYPE_RESCUE:
74
+ fprintf(stderr, "CATCH_TYPE_RESCUE");
75
+ break;
76
+ case CATCH_TYPE_ENSURE:
77
+ fprintf(stderr, "CATCH_TYPE_ENSURE");
78
+ break;
79
+ case CATCH_TYPE_RETRY:
80
+ fprintf(stderr, "CATCH_TYPE_RETRY");
81
+ break;
82
+ case CATCH_TYPE_BREAK:
83
+ fprintf(stderr, "CATCH_TYPE_BREAK");
84
+ break;
85
+ case CATCH_TYPE_REDO:
86
+ fprintf(stderr, "CATCH_TYPE_REDO");
87
+ break;
88
+ case CATCH_TYPE_NEXT:
89
+ fprintf(stderr, "CATCH_TYPE_NEXT");
90
+ break;
91
+ }
92
+
93
+ fprintf(stderr, ": start=%d, end=%d, cont=%d, sp=%d, iseq=%lx\n",
94
+ entry->start, entry->end, entry->cont, entry->sp, (VALUE)entry->iseq);
95
+ }
96
+ fprintf(stderr, "\n");
97
+ }
98
+
99
+ // NOTE: insns(|_info).inc has some static functions, and llrb_dump_cfg uses all those functions.
100
+ // Thus we must call this function somewhere in all included files to pass compilation. `if (0)` is placed for such a purpose.
101
+ static void
102
+ llrb_dump_cfg(const struct rb_iseq_constant_body *body, const struct llrb_cfg *cfg)
103
+ {
104
+ fprintf(stderr, "\n== LLRB: cfg ================================\n");
105
+ for (unsigned int i = 0; i < cfg->size; i++) {
106
+ struct llrb_basic_block *block = cfg->blocks + i;
107
+ fprintf(stderr, "BasicBlock[%d-%d]", block->start, block->end);
108
+
109
+ if (block->incoming_size > 0) fprintf(stderr, " <- ");
110
+ if (!block->traversed) fprintf(stderr, " UNREACHABLE");
111
+ for (unsigned int j = 0; j < block->incoming_size; j++) {
112
+ fprintf(stderr, "%d", block->incoming_starts[j]);
113
+ if (j != block->incoming_size-1) {
114
+ fprintf(stderr, ", ");
115
+ }
116
+ }
117
+
118
+ fprintf(stderr, "\n");
119
+ llrb_disasm_insns(body, block->start, block->end);
120
+ }
121
+ llrb_dump_catch_table(body->catch_table);
122
+ }
123
+
124
+ #endif // LLRB_CFG_H
@@ -0,0 +1,987 @@
1
+ /*
2
+ * compiler.c: Compiles encoded YARV instructions structured as Control Flow Graph to LLVM IR.
3
+ */
4
+
5
+ #include <stdarg.h>
6
+ #include <stdbool.h>
7
+ #include <string.h>
8
+ #include "llvm-c/BitReader.h"
9
+ #include "llvm-c/Core.h"
10
+ #include "cfg.h"
11
+ #include "cruby.h"
12
+
13
+ static VALUE rb_eCompileError;
14
+ #include "compiler/funcs.h"
15
+ #include "compiler/stack.h"
16
+
17
+ // Store compiler's internal state and shared variables
18
+ struct llrb_compiler {
19
+ const struct rb_iseq_constant_body *body;
20
+ const VALUE *new_iseq_encoded; // program counter's base address after this compilation.
21
+ struct llrb_cfg *cfg;
22
+ LLVMValueRef func;
23
+ LLVMBuilderRef builder;
24
+ LLVMModuleRef mod;
25
+ };
26
+
27
+ static inline LLVMValueRef
28
+ llrb_value(VALUE value)
29
+ {
30
+ return LLVMConstInt(LLVMInt64Type(), value, false); // TODO: support 32bit for VALUE type
31
+ }
32
+
33
+ static inline LLVMValueRef
34
+ llrb_get_thread(const struct llrb_compiler *c)
35
+ {
36
+ return LLVMGetParam(c->func, 0);
37
+ }
38
+
39
+ static inline LLVMValueRef
40
+ llrb_get_cfp(const struct llrb_compiler *c)
41
+ {
42
+ return LLVMGetParam(c->func, 1);
43
+ }
44
+
45
+ // In base 2, RTEST is: (v != Qfalse && v != Qnil) -> (v != 0000 && v != 1000) -> (v & 0111) != 0000 -> (v & ~Qnil) != 0
46
+ static LLVMValueRef
47
+ llrb_build_rtest(LLVMBuilderRef builder, LLVMValueRef value)
48
+ {
49
+ LLVMValueRef masked = LLVMBuildAnd(builder, value, llrb_value(~Qnil), "RTEST_mask");
50
+ return LLVMBuildICmp(builder, LLVMIntNE, masked, llrb_value(0), "RTEST");
51
+ }
52
+
53
+ static LLVMValueRef
54
+ llrb_call_func(const struct llrb_compiler *c, const char *funcname, unsigned argc, ...)
55
+ {
56
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, argc); // `xfree`d in the end of this function.
57
+
58
+ va_list ar;
59
+ va_start(ar, argc);
60
+ for (unsigned i = 0; i < argc; i++) {
61
+ args[i] = va_arg(ar, LLVMValueRef);
62
+ }
63
+ va_end(ar);
64
+
65
+ LLVMValueRef ret = LLVMBuildCall(c->builder, llrb_get_function(c->mod, funcname), args, argc, "");
66
+ xfree(args);
67
+ return ret;
68
+ }
69
+
70
+ static inline LLVMValueRef
71
+ llrb_get_self(const struct llrb_compiler *c)
72
+ {
73
+ return llrb_call_func(c, "llrb_self_from_cfp", 1, llrb_get_cfp(c));
74
+ }
75
+
76
+ static LLVMValueRef
77
+ llrb_compile_funcall(const struct llrb_compiler *c, struct llrb_stack *stack, ID mid, int argc)
78
+ {
79
+ LLVMValueRef func = llrb_get_function(c->mod, "rb_funcall");
80
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, 3+argc); // 3 is recv, mid, n
81
+
82
+ for (int i = argc-1; 0 <= i; i--) {
83
+ args[3+i] = llrb_stack_pop(stack); // 3 is recv, mid, n
84
+ }
85
+ args[0] = llrb_stack_pop(stack);
86
+ args[1] = llrb_value(mid);
87
+ args[2] = LLVMConstInt(LLVMInt32Type(), argc, false);
88
+
89
+ return LLVMBuildCall(c->builder, func, args, 3+argc, "rb_funcall");
90
+ }
91
+
92
+ // Must call `llrb_destruct_stack` after usage of return value.
93
+ // TODO: Using `memcpy` would be faster.
94
+ static struct llrb_stack *
95
+ llrb_copy_stack(const struct llrb_stack *stack)
96
+ {
97
+ struct llrb_stack *ret = ALLOC_N(struct llrb_stack, 1); // `xfree`d by `llrb_destruct_stack`.
98
+ ret->size = stack->size;
99
+ ret->max = stack->max;
100
+ ret->body = ALLOC_N(LLVMValueRef, ret->max); // `xfree`d by `llrb_destruct_stack`.
101
+ for (unsigned int i = 0; i < stack->size; i++) {
102
+ ret->body[i] = stack->body[i];
103
+ }
104
+ return ret;
105
+ }
106
+
107
+ static void
108
+ llrb_destruct_stack(struct llrb_stack *stack)
109
+ {
110
+ xfree(stack->body);
111
+ xfree(stack);
112
+ }
113
+
114
+ static struct llrb_basic_block *
115
+ llrb_find_block(struct llrb_cfg *cfg, unsigned int start)
116
+ {
117
+ for (unsigned int i = 0; i < cfg->size; i++) {
118
+ struct llrb_basic_block *block = cfg->blocks + i;
119
+ if (block->start == start) return block;
120
+ }
121
+ rb_raise(rb_eCompileError, "BasicBlock (start = %d) was not found in llrb_find_block", start);
122
+ }
123
+
124
+ static void
125
+ llrb_push_incoming_things(const struct llrb_compiler *c, struct llrb_basic_block *block, LLVMBasicBlockRef current_ref, LLVMValueRef incoming_value)
126
+ {
127
+ // Ensure phi node existence here.
128
+ if (block->phi == 0) {
129
+ // Phi node should be created before its basic block's compilation...
130
+ if (block->compiled) {
131
+ rb_raise(rb_eCompileError, "Already compiled BasicBlock (start = %d) didn't have phi and was requested to push incoming value", block->start);
132
+ }
133
+ LLVMPositionBuilderAtEnd(c->builder, block->ref);
134
+ block->phi = LLVMBuildPhi(c->builder, LLVMInt64Type(), ""); // TODO: Support 32bit
135
+ LLVMPositionBuilderAtEnd(c->builder, current_ref);
136
+ }
137
+
138
+ LLVMValueRef values[] = { incoming_value };
139
+ LLVMBasicBlockRef blocks[] = { current_ref };
140
+ LLVMAddIncoming(block->phi, values, blocks, 1);
141
+ }
142
+
143
+ static LLVMValueRef
144
+ llrb_compile_newarray(const struct llrb_compiler *c, struct llrb_stack *stack, long num)
145
+ {
146
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, num+1); // `xfree`d in the end of this function.
147
+ args[0] = LLVMConstInt(LLVMInt64Type(), num, true); // TODO: support 32bit
148
+ for (long i = num; 1 <= i; i--) {
149
+ args[i] = llrb_stack_pop(stack);
150
+ }
151
+
152
+ LLVMValueRef func = llrb_get_function(c->mod, "rb_ary_new_from_args");
153
+ LLVMValueRef ret = LLVMBuildCall(c->builder, func, args, num+1, "newarray");
154
+ xfree(args);
155
+ return ret;
156
+ }
157
+
158
+ // If insn can call any method, it is throwable and needs to change program counter. Or it may rb_raise.
159
+ static bool
160
+ llrb_pc_change_required(const int insn)
161
+ {
162
+ switch (insn) {
163
+ case YARVINSN_tostring:
164
+ case YARVINSN_freezestring:
165
+ case YARVINSN_checkmatch:
166
+ case YARVINSN_send:
167
+ case YARVINSN_opt_str_freeze:
168
+ case YARVINSN_opt_newarray_max:
169
+ case YARVINSN_opt_newarray_min:
170
+ case YARVINSN_opt_send_without_block:
171
+ case YARVINSN_invokesuper:
172
+ case YARVINSN_invokeblock:
173
+ case YARVINSN_leave:
174
+ case YARVINSN_throw:
175
+ case YARVINSN_opt_plus:
176
+ case YARVINSN_opt_minus:
177
+ case YARVINSN_opt_mult:
178
+ case YARVINSN_opt_div:
179
+ case YARVINSN_opt_mod:
180
+ case YARVINSN_opt_eq:
181
+ case YARVINSN_opt_neq:
182
+ case YARVINSN_opt_lt:
183
+ case YARVINSN_opt_le:
184
+ case YARVINSN_opt_gt:
185
+ case YARVINSN_opt_ge:
186
+ case YARVINSN_opt_ltlt:
187
+ case YARVINSN_opt_aref:
188
+ case YARVINSN_opt_aset:
189
+ case YARVINSN_opt_aset_with:
190
+ case YARVINSN_opt_aref_with:
191
+ case YARVINSN_opt_length:
192
+ case YARVINSN_opt_size:
193
+ case YARVINSN_opt_empty_p:
194
+ case YARVINSN_opt_succ:
195
+ case YARVINSN_opt_not:
196
+ case YARVINSN_opt_regexpmatch1:
197
+ case YARVINSN_opt_regexpmatch2:
198
+ case YARVINSN_opt_call_c_function:
199
+ return true;
200
+ default:
201
+ return false;
202
+ }
203
+ }
204
+
205
+ // Catch table checks program counter to decide catch it or not. So we need to set program counter before method call or throw insn.
206
+ static void
207
+ llrb_increment_pc(const struct llrb_compiler *c, const unsigned int pos, const int insn)
208
+ {
209
+ if (pos == 0) return; // Skip. 0 would be opt_call_c_function and there's no need to change.
210
+
211
+ if (llrb_pc_change_required(insn)) {
212
+ // This case should be rejected to compile by `llrb_check_not_compilable`.
213
+ if (pos == 1) rb_raise(rb_eCompileError, "program counter is set to 1 from iseq_encoded");
214
+
215
+ // `pos` MUST NOT be 0 (it causes stack level too deep) or 1 (funcptr is considered as insn and it's invalid as insn).
216
+ const VALUE *pc = c->new_iseq_encoded + pos; // This must be `new_iseq_encoded` to get proper `epc` in `vm_exec`.
217
+ llrb_call_func(c, "llrb_set_pc", 2, llrb_get_cfp(c), llrb_value((VALUE)pc));
218
+ }
219
+ }
220
+
221
+ static void
222
+ llrb_compile_opt_insn(const struct llrb_compiler *c, struct llrb_stack *stack, const char *name, int argc)
223
+ {
224
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, argc); // `xfree`d in this function.
225
+ for (int i = argc-1; i >= 0; i--) {
226
+ args[i] = llrb_stack_pop(stack);
227
+ }
228
+
229
+ char *funcname = ZALLOC_N(char, strlen("llrb_insn_") + strlen(name) + 1); // `xfree`d in this function.
230
+ strcat(funcname, "llrb_insn_");
231
+ strcat(funcname, name);
232
+ llrb_stack_push(stack, LLVMBuildCall(c->builder, llrb_get_function(c->mod, funcname), args, argc, name));
233
+
234
+ xfree(funcname);
235
+ xfree(args);
236
+ }
237
+
238
+ static void llrb_compile_basic_block(const struct llrb_compiler *c, struct llrb_basic_block *block, struct llrb_stack *stack);
239
+
240
+ // opt TODO:
241
+ // YARVINSN_opt_newarray_max:
242
+ // YARVINSN_opt_newarray_min:
243
+ // YARVINSN_opt_aset:
244
+ // YARVINSN_opt_aset_with:
245
+ // YARVINSN_opt_aref_with:
246
+ // YARVINSN_opt_length:
247
+ // YARVINSN_opt_size:
248
+ // YARVINSN_opt_empty_p:
249
+ // YARVINSN_opt_succ:
250
+ // YARVINSN_opt_not:
251
+ // YARVINSN_opt_regexpmatch1:
252
+ // YARVINSN_opt_regexpmatch2:
253
+
254
+ // @param created_br is set true if conditional branch is created. In that case, br for next block isn't created in `llrb_compile_basic_block`.
255
+ // @return true if the IR compiled from given insn includes `ret` instruction. In that case, next block won't be compiled in `llrb_compile_basic_block`.
256
+ static bool
257
+ llrb_compile_insn(const struct llrb_compiler *c, struct llrb_stack *stack, const unsigned int pos, const int insn, const VALUE *operands, bool *created_br)
258
+ {
259
+ llrb_increment_pc(c, pos, insn);
260
+
261
+ //fprintf(stderr, " [DEBUG] llrb_compile_insn: %04d before %-27s (stack size: %d)\n", pos, insn_name(insn), stack->size);
262
+ *created_br = false;
263
+ switch (insn) {
264
+ case YARVINSN_nop:
265
+ break; // nop
266
+ case YARVINSN_getlocal: {
267
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_getlocal", 3, llrb_get_cfp(c),
268
+ llrb_value((lindex_t)operands[0]), llrb_value((rb_num_t)operands[1])));
269
+ break;
270
+ }
271
+ case YARVINSN_setlocal: {
272
+ llrb_call_func(c, "llrb_insn_setlocal", 4, llrb_get_cfp(c),
273
+ llrb_value((lindex_t)operands[0]), llrb_value((rb_num_t)operands[1]), llrb_stack_pop(stack));
274
+ break;
275
+ }
276
+ case YARVINSN_getspecial: {
277
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_getspecial", 2, llrb_value(operands[0]), llrb_value(operands[1])));
278
+ break;
279
+ }
280
+ case YARVINSN_setspecial: {
281
+ llrb_call_func(c, "llrb_insn_setspecial", 2, llrb_value(operands[0]), llrb_stack_pop(stack));
282
+ break;
283
+ }
284
+ case YARVINSN_getinstancevariable:
285
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_getinstancevariable", 3,
286
+ llrb_get_self(c), llrb_value(operands[0]), llrb_value(operands[1])));
287
+ break;
288
+ case YARVINSN_setinstancevariable:
289
+ llrb_call_func(c, "llrb_insn_setinstancevariable", 4, llrb_get_self(c),
290
+ llrb_value(operands[0]), llrb_stack_pop(stack), llrb_value(operands[1]));
291
+ break;
292
+ case YARVINSN_getclassvariable: {
293
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_getclassvariable", 2, llrb_get_cfp(c), llrb_value(operands[0])));
294
+ break;
295
+ }
296
+ case YARVINSN_setclassvariable: {
297
+ llrb_call_func(c, "llrb_insn_setclassvariable", 3, llrb_get_cfp(c), llrb_value(operands[0]), llrb_stack_pop(stack));
298
+ break;
299
+ }
300
+ case YARVINSN_getconstant: {
301
+ llrb_stack_push(stack, llrb_call_func(c, "vm_get_ev_const", 4, llrb_get_thread(c),
302
+ llrb_stack_pop(stack), llrb_value(operands[0]), LLVMConstInt(LLVMInt32Type(), 0, true)));
303
+ break;
304
+ }
305
+ case YARVINSN_setconstant: {
306
+ LLVMValueRef cbase = llrb_stack_pop(stack);
307
+ LLVMValueRef args[] = { llrb_get_self(c), cbase, llrb_value(operands[0]), llrb_stack_pop(stack) };
308
+ LLVMBuildCall(c->builder, llrb_get_function(c->mod, "llrb_insn_setconstant"), args, 4, "");
309
+ break;
310
+ }
311
+ case YARVINSN_getglobal:
312
+ llrb_stack_push(stack, llrb_call_func(c, "rb_gvar_get", 1, llrb_value(operands[0])));
313
+ break;
314
+ case YARVINSN_setglobal:
315
+ llrb_call_func(c, "rb_gvar_set", 2, llrb_value(operands[0]), llrb_stack_pop(stack));
316
+ break;
317
+ case YARVINSN_putnil:
318
+ llrb_stack_push(stack, llrb_value(Qnil));
319
+ break;
320
+ case YARVINSN_putself: {
321
+ llrb_stack_push(stack, llrb_get_self(c));
322
+ break;
323
+ }
324
+ case YARVINSN_putobject:
325
+ llrb_stack_push(stack, llrb_value(operands[0]));
326
+ break;
327
+ case YARVINSN_putspecialobject: {
328
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_putspecialobject", 1, llrb_value(operands[0])));
329
+ break;
330
+ }
331
+ case YARVINSN_putiseq:
332
+ llrb_stack_push(stack, llrb_value(operands[0]));
333
+ break;
334
+ case YARVINSN_putstring: {
335
+ llrb_stack_push(stack, llrb_call_func(c, "rb_str_resurrect", 1, llrb_value(operands[0])));
336
+ break;
337
+ }
338
+ case YARVINSN_concatstrings: {
339
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, operands[0] + 1); // `xfree`d in this block.
340
+ args[0] = llrb_value(operands[0]); // function is in size_t. correct?
341
+ for (long i = (long)operands[0]-1; 0 <= i; i--) {
342
+ args[1+i] = llrb_stack_pop(stack);
343
+ }
344
+ llrb_stack_push(stack, LLVMBuildCall(c->builder, llrb_get_function(c->mod, "llrb_insn_concatstrings"), args, operands[0] + 1, "concatstrings"));
345
+ xfree(args);
346
+ break;
347
+ }
348
+ case YARVINSN_tostring: {
349
+ llrb_stack_push(stack, llrb_call_func(c, "rb_obj_as_string", 1, llrb_stack_pop(stack)));
350
+ break;
351
+ }
352
+ case YARVINSN_freezestring: {
353
+ VALUE debug_info = operands[0];
354
+ LLVMValueRef str = llrb_stack_pop(stack);
355
+ if (!NIL_P(debug_info)) {
356
+ llrb_call_func(c, "rb_ivar_set", 3, str, llrb_value(id_debug_created_info), llrb_value(debug_info));
357
+ }
358
+
359
+ llrb_call_func(c, "rb_str_freeze", 1, str); // TODO: inline
360
+ llrb_stack_push(stack, str);
361
+ break;
362
+ }
363
+ case YARVINSN_toregexp: {
364
+ rb_num_t cnt = operands[1];
365
+ LLVMValueRef *args1 = ALLOC_N(LLVMValueRef, cnt+1); // `xfree`d in this block.
366
+ args1[0] = LLVMConstInt(LLVMInt64Type(), (long)cnt, true);
367
+ for (rb_num_t i = 0; i < cnt; i++) {
368
+ args1[1+i] = llrb_stack_pop(stack);
369
+ }
370
+ LLVMValueRef ary = LLVMBuildCall(c->builder, llrb_get_function(c->mod, "rb_ary_new_from_args"), args1, 1+cnt, "toregexp");
371
+ xfree(args1);
372
+
373
+ llrb_stack_push(stack, llrb_call_func(c, "rb_reg_new_ary", 2, ary, LLVMConstInt(LLVMInt32Type(), (int)operands[0], true)));
374
+
375
+ llrb_call_func(c, "rb_ary_clear", 1, ary);
376
+ break;
377
+ }
378
+ case YARVINSN_newarray:
379
+ llrb_stack_push(stack, llrb_compile_newarray(c, stack, (long)operands[0]));
380
+ break;
381
+ case YARVINSN_duparray: {
382
+ llrb_stack_push(stack, llrb_call_func(c, "rb_ary_resurrect", 1, llrb_value(operands[0]))); // TODO: inline rb_ary_resurrect?
383
+ break;
384
+ }
385
+ //case YARVINSN_expandarray: {
386
+ // rb_num_t flag = (rb_num_t)operands[1];
387
+ // if (flag & 0x02) { // for postarg
388
+ // } else {
389
+ // }
390
+ // break;
391
+ //}
392
+ case YARVINSN_concatarray: {
393
+ LLVMValueRef ary2st = llrb_stack_pop(stack);
394
+ LLVMValueRef ary1 = llrb_stack_pop(stack);
395
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_concatarray", 2, ary1, ary2st));
396
+ break;
397
+ }
398
+ case YARVINSN_splatarray: {
399
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_splatarray", 2, llrb_stack_pop(stack), llrb_value(operands[0])));
400
+ break;
401
+ }
402
+ case YARVINSN_newhash: {
403
+ LLVMValueRef *values = ALLOC_N(LLVMValueRef, operands[0] / 2);
404
+ LLVMValueRef *keys = ALLOC_N(LLVMValueRef, operands[0] / 2);
405
+ for (int i = 0; i < (int)operands[0] / 2; i++) {
406
+ values[i] = llrb_stack_pop(stack);
407
+ keys[i] = llrb_stack_pop(stack);
408
+ }
409
+
410
+ LLVMValueRef result = LLVMBuildCall(c->builder, llrb_get_function(c->mod, "rb_hash_new"), 0, 0, "newhash");
411
+ // reverse set
412
+ for (int i = (int)operands[0] / 2 - 1; 0 <= i; i--) {
413
+ LLVMValueRef args[] = { result, keys[i], values[i] };
414
+ LLVMBuildCall(c->builder, llrb_get_function(c->mod, "rb_hash_aset"), args, 3, "newhash_aset");
415
+ }
416
+ llrb_stack_push(stack, result);
417
+ break;
418
+ }
419
+ case YARVINSN_newrange: {
420
+ LLVMValueRef high = llrb_stack_pop(stack);
421
+ LLVMValueRef low = llrb_stack_pop(stack);
422
+ LLVMValueRef flag = LLVMConstInt(LLVMInt64Type(), operands[0], false);
423
+ llrb_stack_push(stack, llrb_call_func(c, "rb_range_new", 3, low, high, flag));
424
+ break;
425
+ }
426
+ case YARVINSN_pop:
427
+ llrb_stack_pop(stack);
428
+ break;
429
+ case YARVINSN_dup: {
430
+ LLVMValueRef value = llrb_stack_pop(stack);
431
+ llrb_stack_push(stack, value);
432
+ llrb_stack_push(stack, value);
433
+ break;
434
+ }
435
+ case YARVINSN_dupn: {
436
+ LLVMValueRef *values = ALLOC_N(LLVMValueRef, operands[0]); // `xfree`d in this block.
437
+ for (rb_num_t i = 0; i < (rb_num_t)operands[0]; i++) {
438
+ values[i] = llrb_stack_pop(stack); // TODO: obviously no need to pop
439
+ }
440
+
441
+ for (rb_num_t i = 0; i < (rb_num_t)operands[0]; i++) {
442
+ llrb_stack_push(stack, values[operands[0] - 1 - i]);
443
+ }
444
+ for (rb_num_t i = 0; i < (rb_num_t)operands[0]; i++) {
445
+ llrb_stack_push(stack, values[operands[0] - 1 - i]);
446
+ }
447
+ xfree(values);
448
+ break;
449
+ }
450
+ case YARVINSN_swap: {
451
+ LLVMValueRef first = llrb_stack_pop(stack);
452
+ LLVMValueRef second = llrb_stack_pop(stack);
453
+ llrb_stack_push(stack, first);
454
+ llrb_stack_push(stack, second);
455
+ break;
456
+ }
457
+ //case YARVINSN_reverse: {
458
+ // rb_num_t n = (rb_num_t)operands[0];
459
+ // unsigned int last = stack->size - 1;
460
+ // unsigned int top_i = last - n;
461
+
462
+ // for (rb_num_t i = 0; i < n/2; i++) {
463
+ // LLVMValueRef v0 = stack->body[top_i+i];
464
+ // LLVMValueRef v1 = stack->body[last-i];
465
+ // stack->body[top_i+i] = v1;
466
+ // stack->body[last-i] = v0;
467
+ // }
468
+ // break;
469
+ //}
470
+ //case YARVINSN_reput:
471
+ // break; // none
472
+ case YARVINSN_topn: {
473
+ llrb_stack_push(stack, llrb_stack_topn(stack, (unsigned int)operands[0]));
474
+ break;
475
+ }
476
+ case YARVINSN_setn: {
477
+ rb_num_t last = (rb_num_t)stack->size - 1;
478
+ stack->body[last - (rb_num_t)operands[0]] = stack->body[last];
479
+ break;
480
+ }
481
+ case YARVINSN_adjuststack: {
482
+ for (rb_num_t i = 0; i < (rb_num_t)operands[0]; i++) {
483
+ llrb_stack_pop(stack);
484
+ }
485
+ break;
486
+ }
487
+ case YARVINSN_defined: {
488
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_defined", 4, llrb_value(operands[0]),
489
+ llrb_value(operands[1]), llrb_value(operands[2]), llrb_stack_pop(stack)));
490
+ break;
491
+ }
492
+ case YARVINSN_checkmatch: {
493
+ LLVMValueRef pattern = llrb_stack_pop(stack);
494
+ LLVMValueRef target =llrb_stack_pop(stack);
495
+ LLVMValueRef flag = LLVMConstInt(LLVMInt64Type(), operands[0], false);
496
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_checkmatch", 3, target, pattern, flag));
497
+ break;
498
+ }
499
+ case YARVINSN_checkkeyword: {
500
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_checkkeyword", 3, llrb_get_cfp(c),
501
+ llrb_value((lindex_t)operands[0]), llrb_value((rb_num_t)operands[1])));
502
+ break;
503
+ }
504
+ case YARVINSN_trace: {
505
+ rb_event_flag_t flag = (rb_event_flag_t)((rb_num_t)operands[0]);
506
+ LLVMValueRef val = (flag & (RUBY_EVENT_RETURN | RUBY_EVENT_B_RETURN)) ? stack->body[stack->size-1] : llrb_value(Qundef);
507
+ llrb_call_func(c, "llrb_insn_trace", 4, llrb_get_thread(c), llrb_get_cfp(c), LLVMConstInt(LLVMInt32Type(), flag, false), val);
508
+ break;
509
+ }
510
+ //case YARVINSN_defineclass: {
511
+ // ;
512
+ // break;
513
+ //}
514
+ case YARVINSN_send: {
515
+ CALL_INFO ci = (CALL_INFO)operands[0];
516
+ unsigned int stack_size = ci->orig_argc + 1;
517
+ if (ci->flag & VM_CALL_ARGS_BLOCKARG) stack_size++; // push `&block`
518
+
519
+ unsigned int arg_size = 6 + stack_size;
520
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, arg_size);
521
+ args[0] = llrb_get_thread(c);
522
+ args[1] = llrb_get_cfp(c);
523
+ args[2] = llrb_value((VALUE)ci);
524
+ args[3] = llrb_value((VALUE)((CALL_CACHE)operands[1]));
525
+ args[4] = llrb_value((VALUE)((ISEQ)operands[2]));
526
+ args[5] = LLVMConstInt(LLVMInt32Type(), stack_size, false);
527
+ for (int i = (int)stack_size - 1; 0 <= i; i--) { // recv + argc
528
+ args[6 + i] = llrb_stack_pop(stack);
529
+ }
530
+
531
+ llrb_stack_push(stack, LLVMBuildCall(c->builder, llrb_get_function(c->mod, "llrb_insn_send"), args, arg_size, "send"));
532
+ break;
533
+ }
534
+ case YARVINSN_opt_str_freeze: { // TODO: optimize
535
+ llrb_stack_push(stack, llrb_value(operands[0]));
536
+ llrb_compile_opt_insn(c, stack, "opt_str_freeze", 1);
537
+ break;
538
+ }
539
+ case YARVINSN_opt_newarray_max: // TODO: optimize
540
+ llrb_stack_push(stack, llrb_compile_newarray(c, stack, (long)operands[0]));
541
+ llrb_stack_push(stack, llrb_compile_funcall(c, stack, rb_intern("max"), 0));
542
+ break;
543
+ case YARVINSN_opt_newarray_min: // TODO: optimize
544
+ llrb_stack_push(stack, llrb_compile_newarray(c, stack, (long)operands[0]));
545
+ llrb_stack_push(stack, llrb_compile_funcall(c, stack, rb_intern("min"), 0));
546
+ break;
547
+ case YARVINSN_opt_send_without_block: {
548
+ CALL_INFO ci = (CALL_INFO)operands[0];
549
+ unsigned int stack_size = ci->orig_argc + 1;
550
+
551
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, 5 + stack_size); // `xfree`d in this block.
552
+ args[0] = llrb_get_thread(c);
553
+ args[1] = llrb_get_cfp(c);
554
+ args[2] = llrb_value((VALUE)ci);
555
+ args[3] = llrb_value((VALUE)((CALL_CACHE)operands[1]));
556
+ args[4] = LLVMConstInt(LLVMInt32Type(), stack_size, false);
557
+ for (int i = (int)stack_size - 1; 0 <= i; i--) { // recv + argc
558
+ args[5 + i] = llrb_stack_pop(stack);
559
+ }
560
+
561
+ LLVMValueRef func = llrb_get_function(c->mod, "llrb_insn_opt_send_without_block");
562
+ llrb_stack_push(stack, LLVMBuildCall(c->builder, func, args, 5 + stack_size, "opt_send_without_block"));
563
+ xfree(args);
564
+ break;
565
+ }
566
+ case YARVINSN_invokesuper: { // TODO: refactor with opt_send_without_block
567
+ CALL_INFO ci = (CALL_INFO)operands[0];
568
+ unsigned int stack_size = ci->orig_argc + 1;
569
+
570
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, 5 + stack_size);
571
+ args[0] = llrb_get_thread(c);
572
+ args[1] = llrb_get_cfp(c);
573
+ args[2] = llrb_value((VALUE)ci);
574
+ args[3] = llrb_value((VALUE)((CALL_CACHE)operands[1]));
575
+ args[4] = llrb_value((VALUE)((ISEQ)operands[2]));
576
+ args[5] = LLVMConstInt(LLVMInt32Type(), stack_size, false);
577
+ for (int i = (int)stack_size - 1; 0 <= i; i--) { // recv + argc
578
+ args[6 + i] = llrb_stack_pop(stack);
579
+ }
580
+ llrb_stack_push(stack, LLVMBuildCall(c->builder, llrb_get_function(c->mod, "llrb_insn_invokesuper"), args, 6 + stack_size, "invokesuper"));
581
+ break;
582
+ }
583
+ case YARVINSN_invokeblock: {
584
+ CALL_INFO ci = (CALL_INFO)operands[0];
585
+ unsigned int stack_size = ci->orig_argc;
586
+
587
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, 4 + stack_size);
588
+ args[0] = llrb_get_thread(c);
589
+ args[1] = llrb_get_cfp(c);
590
+ args[2] = llrb_value((VALUE)ci);
591
+ args[3] = LLVMConstInt(LLVMInt32Type(), stack_size, false);
592
+ for (int i = (int)stack_size - 1; 0 <= i; i--) { // recv + argc
593
+ args[4 + i] = llrb_stack_pop(stack);
594
+ }
595
+ llrb_stack_push(stack, LLVMBuildCall(c->builder, llrb_get_function(c->mod, "llrb_insn_invokeblock"), args, 4 + stack_size, "invokeblock"));
596
+ break;
597
+ }
598
+ case YARVINSN_leave:
599
+ if (stack->size != 1) {
600
+ llrb_dump_cfg(c->body, c->cfg);
601
+ rb_raise(rb_eCompileError, "unexpected stack size at leave: %d", stack->size);
602
+ }
603
+
604
+ llrb_call_func(c, "llrb_push_result", 2, llrb_get_cfp(c), llrb_stack_pop(stack));
605
+ LLVMBuildRet(c->builder, llrb_get_cfp(c));
606
+ return true;
607
+ case YARVINSN_throw: {
608
+ llrb_call_func(c, "llrb_insn_throw", 4, llrb_get_thread(c), llrb_get_cfp(c),
609
+ llrb_value((rb_num_t)operands[0]), llrb_stack_pop(stack));
610
+
611
+ // In opt_call_c_function, if we return 0, we can throw error fron th->errinfo.
612
+ // https://github.com/ruby/ruby/blob/v2_4_1/insns.def#L2147-L2151
613
+ LLVMBuildRet(c->builder, llrb_value(0));
614
+ return true;
615
+ }
616
+ case YARVINSN_jump: {
617
+ unsigned dest = pos + (unsigned)insn_len(insn) + operands[0];
618
+ struct llrb_basic_block *next_block = llrb_find_block(c->cfg, dest);
619
+
620
+ LLVMBuildBr(c->builder, next_block->ref);
621
+ *created_br = true;
622
+
623
+ if (next_block->incoming_size > 1 && stack->size > 0) {
624
+ llrb_push_incoming_things(c, next_block, LLVMGetInsertBlock(c->builder), llrb_stack_pop(stack));
625
+ }
626
+ llrb_compile_basic_block(c, next_block, stack);
627
+ return true;
628
+ }
629
+ case YARVINSN_branchif: { // TODO: refactor with other branch insns
630
+ unsigned branch_dest = pos + (unsigned)insn_len(insn) + operands[0];
631
+ unsigned fallthrough = pos + (unsigned)insn_len(insn);
632
+ struct llrb_basic_block *branch_dest_block = llrb_find_block(c->cfg, branch_dest);
633
+ struct llrb_basic_block *fallthrough_block = llrb_find_block(c->cfg, fallthrough);
634
+
635
+ LLVMValueRef cond = llrb_stack_pop(stack);
636
+ LLVMBuildCondBr(c->builder, llrb_build_rtest(c->builder, cond), branch_dest_block->ref, fallthrough_block->ref);
637
+ *created_br = true;
638
+
639
+ struct llrb_stack *branch_dest_stack = llrb_copy_stack(stack); // `llrb_destruct_stack`ed in this block.
640
+ if (branch_dest_block->incoming_size > 1 && branch_dest_stack->size > 0) {
641
+ llrb_push_incoming_things(c, branch_dest_block,
642
+ LLVMGetInsertBlock(c->builder), llrb_stack_pop(branch_dest_stack));
643
+ }
644
+ llrb_compile_basic_block(c, branch_dest_block, branch_dest_stack);
645
+ llrb_destruct_stack(branch_dest_stack);
646
+ break; // caller `compile_basic_block` compiles fallthrough_block and pushes incoming things to its phi node.
647
+ }
648
+ case YARVINSN_branchunless: { // TODO: refactor with other branch insns
649
+ unsigned branch_dest = pos + (unsigned)insn_len(insn) + operands[0];
650
+ unsigned fallthrough = pos + (unsigned)insn_len(insn);
651
+ struct llrb_basic_block *branch_dest_block = llrb_find_block(c->cfg, branch_dest);
652
+ struct llrb_basic_block *fallthrough_block = llrb_find_block(c->cfg, fallthrough);
653
+
654
+ LLVMValueRef cond = llrb_stack_pop(stack);
655
+ LLVMBuildCondBr(c->builder, llrb_build_rtest(c->builder, cond), fallthrough_block->ref, branch_dest_block->ref);
656
+ *created_br = true;
657
+
658
+ struct llrb_stack *branch_dest_stack = llrb_copy_stack(stack); // `llrb_destruct_stack`ed in this block.
659
+ if (branch_dest_block->incoming_size > 1 && branch_dest_stack->size > 0) {
660
+ llrb_push_incoming_things(c, branch_dest_block,
661
+ LLVMGetInsertBlock(c->builder), llrb_stack_pop(branch_dest_stack));
662
+ }
663
+ llrb_compile_basic_block(c, branch_dest_block, branch_dest_stack);
664
+ llrb_destruct_stack(branch_dest_stack);
665
+ break; // caller `compile_basic_block` compiles fallthrough_block and pushes incoming things to its phi node.
666
+ }
667
+ case YARVINSN_branchnil: { // TODO: refactor with other branch insns
668
+ unsigned branch_dest = pos + (unsigned)insn_len(insn) + operands[0];
669
+ unsigned fallthrough = pos + (unsigned)insn_len(insn);
670
+ struct llrb_basic_block *branch_dest_block = llrb_find_block(c->cfg, branch_dest);
671
+ struct llrb_basic_block *fallthrough_block = llrb_find_block(c->cfg, fallthrough);
672
+
673
+ LLVMValueRef cond = llrb_stack_pop(stack);
674
+ LLVMBuildCondBr(c->builder,
675
+ LLVMBuildICmp(c->builder, LLVMIntNE, cond, llrb_value(Qnil), "NIL_P"),
676
+ fallthrough_block->ref, branch_dest_block->ref);
677
+ *created_br = true;
678
+
679
+ struct llrb_stack *branch_dest_stack = llrb_copy_stack(stack); // `llrb_destruct_stack`ed in this block.
680
+ if (branch_dest_block->incoming_size > 1 && branch_dest_stack->size > 0) {
681
+ llrb_push_incoming_things(c, branch_dest_block,
682
+ LLVMGetInsertBlock(c->builder), llrb_stack_pop(branch_dest_stack));
683
+ }
684
+ llrb_compile_basic_block(c, branch_dest_block, branch_dest_stack);
685
+ llrb_destruct_stack(branch_dest_stack);
686
+ break; // caller `compile_basic_block` compiles fallthrough_block and pushes incoming things to its phi node.
687
+ }
688
+ case YARVINSN_getinlinecache:
689
+ llrb_stack_push(stack, llrb_value(Qnil)); // TODO: implement
690
+ break;
691
+ case YARVINSN_setinlinecache:
692
+ break; // TODO: implement
693
+ //case YARVINSN_once:
694
+ case YARVINSN_opt_case_dispatch: // Use `switch` instruction
695
+ llrb_stack_pop(stack); // TODO: implement
696
+ break;
697
+ case YARVINSN_opt_plus:
698
+ llrb_compile_opt_insn(c, stack, "opt_plus", 2);
699
+ break;
700
+ case YARVINSN_opt_minus:
701
+ llrb_compile_opt_insn(c, stack, "opt_minus", 2);
702
+ break;
703
+ case YARVINSN_opt_mult:
704
+ llrb_compile_opt_insn(c, stack, "opt_mult", 2);
705
+ break;
706
+ case YARVINSN_opt_div:
707
+ llrb_compile_opt_insn(c, stack, "opt_div", 2);
708
+ break;
709
+ case YARVINSN_opt_mod:
710
+ llrb_compile_opt_insn(c, stack, "opt_mod", 2);
711
+ break;
712
+ case YARVINSN_opt_eq:
713
+ llrb_compile_opt_insn(c, stack, "opt_eq", 2);
714
+ break;
715
+ case YARVINSN_opt_neq: {
716
+ LLVMValueRef *args = ALLOC_N(LLVMValueRef, 6); // `xfree`d in this block.
717
+ args[1] = llrb_stack_pop(stack);
718
+ args[0] = llrb_stack_pop(stack);
719
+ args[2] = llrb_value(operands[0]);
720
+ args[3] = llrb_value(operands[1]);
721
+ args[4] = llrb_value(operands[2]);
722
+ args[5] = llrb_value(operands[3]);
723
+ llrb_stack_push(stack, LLVMBuildCall(c->builder, llrb_get_function(c->mod, "llrb_insn_opt_neq"), args, 6, ""));
724
+ xfree(args);
725
+ break;
726
+ }
727
+ case YARVINSN_opt_lt:
728
+ llrb_compile_opt_insn(c, stack, "opt_lt", 2);
729
+ break;
730
+ case YARVINSN_opt_le:
731
+ llrb_compile_opt_insn(c, stack, "opt_le", 2);
732
+ break;
733
+ case YARVINSN_opt_gt:
734
+ llrb_compile_opt_insn(c, stack, "opt_gt", 2);
735
+ break;
736
+ case YARVINSN_opt_ge:
737
+ llrb_compile_opt_insn(c, stack, "opt_ge", 2);
738
+ break;
739
+ case YARVINSN_opt_ltlt:
740
+ llrb_compile_opt_insn(c, stack, "opt_ltlt", 2);
741
+ break;
742
+ case YARVINSN_opt_aref:
743
+ llrb_compile_opt_insn(c, stack, "opt_aref", 2);
744
+ break;
745
+ case YARVINSN_opt_aset:
746
+ llrb_compile_opt_insn(c, stack, "opt_aset", 3);
747
+ break;
748
+ case YARVINSN_opt_aset_with: {
749
+ LLVMValueRef value = llrb_stack_pop(stack);
750
+ LLVMValueRef recv = llrb_stack_pop(stack);
751
+ LLVMValueRef str = llrb_call_func(c, "rb_str_resurrect", 1, llrb_value(operands[2]));
752
+
753
+ // Not using llrb_compile_funcall to prevent stack overflow
754
+ llrb_stack_push(stack, llrb_call_func(c, "rb_funcall", 5, recv,
755
+ llrb_value(rb_intern("[]=")), LLVMConstInt(LLVMInt32Type(), 2, true), str, value));
756
+ break;
757
+ }
758
+ case YARVINSN_opt_aref_with: {
759
+ LLVMValueRef str = llrb_call_func(c, "rb_str_resurrect", 1, llrb_value(operands[2]));
760
+
761
+ // Not using llrb_compile_funcall to prevent stack overflow
762
+ llrb_stack_push(stack, llrb_call_func(c, "rb_funcall", 4, llrb_stack_pop(stack),
763
+ llrb_value(rb_intern("[]")), LLVMConstInt(LLVMInt32Type(), 1, true), str));
764
+ break;
765
+ }
766
+ case YARVINSN_opt_length:
767
+ llrb_stack_push(stack, llrb_compile_funcall(c, stack, rb_intern("length"), 0));
768
+ break;
769
+ case YARVINSN_opt_size:
770
+ llrb_stack_push(stack, llrb_compile_funcall(c, stack, rb_intern("size"), 0));
771
+ break;
772
+ case YARVINSN_opt_empty_p:
773
+ llrb_stack_push(stack, llrb_compile_funcall(c, stack, rb_intern("empty?"), 0));
774
+ break;
775
+ case YARVINSN_opt_succ:
776
+ llrb_stack_push(stack, llrb_compile_funcall(c, stack, rb_intern("succ"), 0));
777
+ break;
778
+ case YARVINSN_opt_not:
779
+ llrb_stack_push(stack, llrb_compile_funcall(c, stack, '!', 0));
780
+ break;
781
+ case YARVINSN_opt_regexpmatch1: {
782
+ // Not using llrb_compile_funcall to prevent stack overflow
783
+ llrb_stack_push(stack, llrb_call_func(c, "rb_funcall", 4, llrb_stack_pop(stack),
784
+ llrb_value(rb_intern("=~")), LLVMConstInt(LLVMInt32Type(), 1, true), llrb_value(operands[0])));
785
+ break;
786
+ }
787
+ case YARVINSN_opt_regexpmatch2: {
788
+ llrb_stack_push(stack, llrb_compile_funcall(c, stack, rb_intern("=~"), 1));
789
+ break;
790
+ }
791
+ //case YARVINSN_opt_call_c_function:
792
+ case YARVINSN_getlocal_OP__WC__0: {
793
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_getlocal_level0", 2, llrb_get_cfp(c), llrb_value((lindex_t)operands[0])));
794
+ break;
795
+ }
796
+ case YARVINSN_getlocal_OP__WC__1: {
797
+ llrb_stack_push(stack, llrb_call_func(c, "llrb_insn_getlocal_level1", 2, llrb_get_cfp(c), llrb_value((lindex_t)operands[0])));
798
+ break;
799
+ }
800
+ case YARVINSN_setlocal_OP__WC__0: {
801
+ LLVMValueRef idx = llrb_value((lindex_t)operands[0]);
802
+ llrb_call_func(c, "llrb_insn_setlocal_level0", 3, llrb_get_cfp(c), idx, llrb_stack_pop(stack));
803
+ break;
804
+ }
805
+ case YARVINSN_setlocal_OP__WC__1: {
806
+ LLVMValueRef idx = llrb_value((lindex_t)operands[0]);
807
+ llrb_call_func(c, "llrb_insn_setlocal_level1", 3, llrb_get_cfp(c), idx, llrb_stack_pop(stack));
808
+ break;
809
+ }
810
+ case YARVINSN_putobject_OP_INT2FIX_O_0_C_:
811
+ llrb_stack_push(stack, llrb_value(INT2FIX(0)));
812
+ break;
813
+ case YARVINSN_putobject_OP_INT2FIX_O_1_C_:
814
+ llrb_stack_push(stack, llrb_value(INT2FIX(1)));
815
+ break;
816
+ default:
817
+ llrb_dump_cfg(c->body, c->cfg);
818
+ rb_raise(rb_eCompileError, "Unhandled insn at llrb_compile_insn: %s", insn_name(insn));
819
+ break;
820
+ }
821
+ //fprintf(stderr, " [DEBUG] llrb_compile_insn: %04d after %-27s (stack size: %d)\n", pos, insn_name(insn), stack->size);
822
+ return false;
823
+ }
824
+
825
+ static void
826
+ llrb_compile_basic_block(const struct llrb_compiler *c, struct llrb_basic_block *block, struct llrb_stack *stack)
827
+ {
828
+ if (block->compiled) return;
829
+ block->compiled = true;
830
+
831
+ // If phi node is created for this block, push it to stack.
832
+ if (block->phi) {
833
+ llrb_stack_push(stack, block->phi);
834
+ }
835
+
836
+ // Here is the actual compilation of block specified in arguments.
837
+ bool returned = false, created_br = false;
838
+ unsigned int pos = block->start;
839
+ while (pos <= block->end) {
840
+ LLVMPositionBuilderAtEnd(c->builder, block->ref); // Reset everytime to allow recursive compilation.
841
+ int insn = rb_vm_insn_addr2insn((void *)c->body->iseq_encoded[pos]);
842
+ returned = llrb_compile_insn(c, stack, pos, insn, c->body->iseq_encoded + (pos+1), &created_br);
843
+ pos += insn_len(insn);
844
+ }
845
+
846
+ // If the function is not returned yet, compiles next block.
847
+ if (!returned) {
848
+ // In the end of the function, it must be returned...
849
+ if (pos >= c->body->iseq_size) {
850
+ rb_raise(rb_eCompileError, "Compiler compiled the end of function but the function was not returned");
851
+ }
852
+
853
+ struct llrb_basic_block *next_block = llrb_find_block(c->cfg, pos);
854
+ LLVMPositionBuilderAtEnd(c->builder, block->ref); // Reset to allow recursive compilation.
855
+ if (!created_br) LLVMBuildBr(c->builder, next_block->ref);
856
+
857
+ if (next_block->incoming_size > 1 && stack->size > 0) {
858
+ llrb_push_incoming_things(c, next_block, block->ref, llrb_stack_pop(stack));
859
+ }
860
+ llrb_compile_basic_block(c, next_block, stack);
861
+ }
862
+ }
863
+
864
+ static LLVMBasicBlockRef
865
+ llrb_build_basic_block_ref(const struct llrb_compiler *c, const struct llrb_basic_block *block)
866
+ {
867
+ VALUE label = rb_str_new_cstr("label_"); // `rb_str_free`d in the end of this function.
868
+ rb_str_catf(label, "%d", block->start);
869
+
870
+ LLVMBasicBlockRef ref = LLVMAppendBasicBlock(c->func, RSTRING_PTR(label));
871
+ rb_str_free(label);
872
+ return ref;
873
+ }
874
+
875
+ static void
876
+ llrb_init_cfg_for_compile(const struct llrb_compiler *c, struct llrb_cfg *cfg)
877
+ {
878
+ for (unsigned int i = 0; i < cfg->size; i++) {
879
+ struct llrb_basic_block* block = cfg->blocks + i;
880
+ block->compiled = false;
881
+ if (!block->traversed) continue;
882
+
883
+ block->ref = llrb_build_basic_block_ref(c, block);
884
+ block->phi = 0;
885
+ }
886
+ }
887
+
888
+ // Compiles Control Flow Graph having encoded YARV instructions to LLVM IR.
889
+ static LLVMValueRef
890
+ llrb_compile_cfg(LLVMModuleRef mod, const struct rb_iseq_constant_body *body, const VALUE *new_iseq_encoded, struct llrb_cfg *cfg, const char* funcname)
891
+ {
892
+ LLVMTypeRef args[] = { LLVMInt64Type(), LLVMInt64Type() };
893
+ LLVMValueRef func = LLVMAddFunction(mod, funcname,
894
+ LLVMFunctionType(LLVMInt64Type(), args, 2, false));
895
+
896
+ const struct llrb_compiler compiler = (struct llrb_compiler){
897
+ .body = body,
898
+ .new_iseq_encoded = new_iseq_encoded,
899
+ .cfg = cfg,
900
+ .func = func,
901
+ .builder = LLVMCreateBuilder(),
902
+ .mod = mod,
903
+ };
904
+ llrb_init_cfg_for_compile(&compiler, cfg);
905
+
906
+ // To simulate YARV stack, we need to traverse CFG again here instead of loop from start to end.
907
+ struct llrb_stack stack = (struct llrb_stack){
908
+ .body = ALLOC_N(LLVMValueRef, body->stack_max), // `xfree`d in the end of this function.
909
+ .size = 0,
910
+ .max = body->stack_max,
911
+ };
912
+ llrb_compile_basic_block(&compiler, cfg->blocks, &stack);
913
+
914
+ xfree(stack.body);
915
+ return func;
916
+ }
917
+
918
+ // This sweaps memory `xmalloc`ed by llrb_create_basic_blocks.
919
+ static void
920
+ llrb_destruct_cfg(struct llrb_cfg *cfg)
921
+ {
922
+ for (unsigned int i = 0; i < cfg->size; i++) {
923
+ struct llrb_basic_block *block = cfg->blocks + i;
924
+ xfree(block->incoming_starts);
925
+ }
926
+ xfree(cfg->blocks);
927
+ }
928
+
929
+ static bool
930
+ llrb_includes_unsupported_insn(const rb_iseq_t *iseq)
931
+ {
932
+ unsigned int i = 0;
933
+ while (i < iseq->body->iseq_size) {
934
+ int insn = rb_vm_insn_addr2insn((void *)iseq->body->iseq_encoded[i]);
935
+ switch (insn) {
936
+ case YARVINSN_expandarray:
937
+ case YARVINSN_reverse:
938
+ case YARVINSN_reput:
939
+ case YARVINSN_defineclass:
940
+ case YARVINSN_once:
941
+ case YARVINSN_opt_call_c_function:
942
+ return true;
943
+ default:
944
+ break;
945
+ }
946
+ i += insn_len(insn);
947
+ }
948
+ return false;
949
+ }
950
+
951
+ bool
952
+ llrb_check_not_compilable(const rb_iseq_t *iseq)
953
+ {
954
+ // At least 3 is needed: opt_call_c_function + funcptr + leave
955
+ return iseq->body->iseq_size < 3
956
+ // We don't want to set pc to index 1. It will be funcptr. So we don't compile for such case.
957
+ || (insn_len(rb_vm_insn_addr2insn((void *)iseq->body->iseq_encoded[0])) == 1 &&
958
+ llrb_pc_change_required(rb_vm_insn_addr2insn((void *)iseq->body->iseq_encoded[1])))
959
+ || llrb_includes_unsupported_insn(iseq);
960
+ }
961
+
962
+ // llrb_create_native_func() uses a LLVM function named as `funcname` defined in returned LLVM module.
963
+ LLVMModuleRef
964
+ llrb_compile_iseq(const struct rb_iseq_constant_body *body, const VALUE *new_iseq_encoded, const char* funcname)
965
+ {
966
+ extern void llrb_parse_iseq(const struct rb_iseq_constant_body *body, struct llrb_cfg *result);
967
+ struct llrb_cfg cfg;
968
+ llrb_parse_iseq(body, &cfg);
969
+
970
+ LLVMModuleRef mod = LLVMModuleCreateWithName("llrb");
971
+ LLVMValueRef func = llrb_compile_cfg(mod, body, new_iseq_encoded, &cfg, funcname);
972
+
973
+ extern void llrb_optimize_function(LLVMModuleRef cmod, LLVMValueRef cfunc);
974
+ if (1) llrb_optimize_function(mod, func);
975
+
976
+ if (0) llrb_dump_cfg(body, &cfg);
977
+ if (0) LLVMDumpModule(mod);
978
+
979
+ llrb_destruct_cfg(&cfg);
980
+ return mod;
981
+ }
982
+
983
+ void
984
+ Init_compiler(VALUE rb_mJIT)
985
+ {
986
+ rb_eCompileError = rb_define_class_under(rb_mJIT, "CompileError", rb_eStandardError);
987
+ }