metasm 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env ruby
2
+ # This file is part of Metasm, the Ruby assembly manipulation suite
3
+ # Copyright (C) 2006-2009 Yoann GUILLOT
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+
8
+ #
9
+ # this script disassembles an executable (elf/pe) and dumps the output
10
+ # ruby -h for help
11
+ #
12
+
13
+ require 'metasm'
14
+ include Metasm
15
+ require 'optparse'
16
+
17
+ # parse arguments
18
+ opts = { :sc_cpu => 'Ia32' }
19
+ OptionParser.new { |opt|
20
+ opt.banner = 'Usage: disassemble.rb [options] <executable> [<entrypoints>]'
21
+ opt.on('--no-data', 'do not display data bytes') { opts[:nodata] = true }
22
+ opt.on('--no-data-trace', 'do not backtrace memory read/write accesses') { opts[:nodatatrace] = true }
23
+ opt.on('--debug-backtrace', 'enable backtrace-related debug messages (very verbose)') { opts[:debugbacktrace] = true }
24
+ opt.on('-c <header>', '--c-header <header>', 'read C function prototypes (for external library functions)') { |h| opts[:cheader] = h }
25
+ opt.on('-o <outfile>', '--output <outfile>', 'save the assembly listing in the specified file (defaults to stdout)') { |h| opts[:outfile] = h }
26
+ opt.on('--cpu <cpu>', 'the CPU class to use for a shellcode (Ia32, X64, ...)') { |c| opts[:sc_cpu] = c }
27
+ opt.on('--exe <exe_fmt>', 'the executable file format to use (PE, ELF, ...)') { |c| opts[:exe_fmt] = c }
28
+ opt.on('--rebase <addr>', 'rebase the loaded file to <addr>') { |a| opts[:rebase] = Integer(a) }
29
+ opt.on('-s <savefile>', 'save the disassembler state after disasm') { |h| opts[:savefile] = h }
30
+ opt.on('-S <addrlist>', '--stop <addrlist>', '--stopaddr <addrlist>', 'do not disassemble past these addresses') { |h| opts[:stopaddr] ||= [] ; opts[:stopaddr] |= h.split ',' }
31
+ opt.on('-P <plugin>', '--plugin <plugin>', 'load a metasm disassembler plugin') { |h| (opts[:plugin] ||= []) << h }
32
+ opt.on('--post-plugin <plugin>', 'load a metasm disassembler plugin after disassembly is finished') { |h| (opts[:post_plugin] ||= []) << h }
33
+ opt.on('-e <code>', '--eval <code>', 'eval a ruby code') { |h| (opts[:hookstr] ||= []) << h }
34
+ opt.on('--benchmark') { opts[:benchmark] = true }
35
+ opt.on('--decompile') { opts[:decompile] = true }
36
+ opt.on('--map <mapfile>') { |f| opts[:map] = f }
37
+ opt.on('-a', '--autoload', 'loads all relevant files with same filename (.h, .map..)') { opts[:autoload] = true }
38
+ opt.on('--fast', 'use disassemble_fast (no backtracking)') { opts[:fast] = true }
39
+ opt.on('-v', '--verbose') { $VERBOSE = true }
40
+ opt.on('-d', '--debug') { $DEBUG = $VERBOSE = true }
41
+ }.parse!(ARGV)
42
+
43
+ exename = ARGV.shift
44
+
45
+ t0 = Time.now if opts[:benchmark]
46
+
47
+ # load the file
48
+ if exename =~ /^live:(.*)/
49
+ raise 'no such live target' if not target = OS.current.find_process($1)
50
+ p target if $VERBOSE
51
+ exe = Shellcode.decode(target.memory, Metasm.const_get(opts[:sc_cpu]).new)
52
+ else
53
+ exefmt = opts[:exe_fmt] ? Metasm.const_get(opts[:exe_fmt]) : AutoExe.orshellcode { Metasm.const_get(opts[:sc_cpu]).new }
54
+ exefmt = exefmt.withcpu(Metasm.const_get(opts[:sc_cpu]).new) if opts[:exe_fmt] == 'Shellcode' and opts[:sc_cpu]
55
+ exe = exefmt.decode_file(exename)
56
+ exe.disassembler.rebase(opts[:rebase]) if opts[:rebase]
57
+ if opts[:autoload]
58
+ basename = exename.sub(/\.\w\w?\w?$/, '')
59
+ opts[:map] ||= basename + '.map' if File.exist?(basename + '.map')
60
+ opts[:cheader] ||= basename + '.h' if File.exist?(basename + '.h')
61
+ (opts[:plugin] ||= []) << (basename + '.rb') if File.exist?(basename + '.rb')
62
+ end
63
+ end
64
+ # set options
65
+ dasm = exe.init_disassembler
66
+ makeint = lambda { |addr|
67
+ case addr
68
+ when /^[0-9].*h/; addr.to_i(16)
69
+ when /^[0-9]/; Integer(addr)
70
+ else dasm.normalize(addr)
71
+ end
72
+ }
73
+ dasm.load_map opts[:map] if opts[:map]
74
+ dasm.parse_c_file opts[:cheader] if opts[:cheader]
75
+ dasm.backtrace_maxblocks_data = -1 if opts[:nodatatrace]
76
+ dasm.debug_backtrace = true if opts[:debugbacktrace]
77
+ opts[:stopaddr].to_a.each { |addr| dasm.decoded[makeint[addr]] = true }
78
+ opts[:plugin].to_a.each { |p| dasm.load_plugin p }
79
+ opts[:hookstr].to_a.each { |f| eval f }
80
+
81
+ t1 = Time.now if opts[:benchmark]
82
+ # do the work
83
+ begin
84
+ method = opts[:fast] ? :disassemble_fast_deep : :disassemble
85
+ if ARGV.empty?
86
+ exe.send(method)
87
+ else
88
+ exe.send(method, *ARGV.map { |addr| makeint[addr] })
89
+ end
90
+ rescue Interrupt
91
+ puts $!, $!.backtrace
92
+ end
93
+ t2 = Time.now if opts[:benchmark]
94
+
95
+ if opts[:decompile]
96
+ dasm.save_file(opts[:savefile]) if opts[:savefile]
97
+ dasm.decompile(*dasm.entrypoints)
98
+ tdc = Time.now if opts[:benchmark]
99
+ end
100
+
101
+ opts[:post_plugin].to_a.each { |p| dasm.load_plugin p }
102
+
103
+ dasm.save_file(opts[:savefile]) if opts[:savefile]
104
+
105
+ # output
106
+ if opts[:outfile]
107
+ File.open(opts[:outfile], 'w') { |fd|
108
+ fd.puts dasm.c_parser if opts[:decompile]
109
+ fd.puts "#if 0" if opts[:decompile]
110
+ dasm.dump(!opts[:nodata]) { |l| fd.puts l }
111
+ fd.puts "#endif" if opts[:decompile]
112
+ }
113
+ elsif not opts[:savefile]
114
+ if opts[:decompile]
115
+ puts dasm.c_parser
116
+ else
117
+ dasm.dump(!opts[:nodata])
118
+ end
119
+ end
120
+
121
+ t3 = Time.now if opts[:benchmark]
122
+
123
+ todate = lambda { |f|
124
+ if f > 5400
125
+ "#{f.to_i/3600}h#{(f.to_i%3600)/60}mn"
126
+ elsif f > 90
127
+ "#{f.to_i/60}mn#{f.to_i%60}s"
128
+ else
129
+ "#{'%.02f' % f}s"
130
+ end
131
+ }
132
+
133
+ puts "durations\n load #{todate[t1-t0]}\n dasm #{todate[t2-t1]}#{"\n decomp "+todate[tdc-t2] if tdc}\n output #{todate[t3-(tdc||t2)]}\n total #{todate[t3-t0]}" if opts[:benchmark]
@@ -0,0 +1,95 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ # Original script and idea by Alexandre GAZET
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+
8
+ #
9
+ # this script will load an upx-packed windows executable, find its
10
+ # original entrypoint by disassembling the UPX stub, set breakpoint on it,
11
+ # run the program, and dump the loaded image to an executable PE.
12
+ #
13
+ # usage: dump_upx.rb <packed.exe> [<dumped.exe>] [<rva iat>]
14
+ #
15
+
16
+ require 'metasm'
17
+ include Metasm
18
+
19
+ class UPXUnpacker
20
+ # loads the file
21
+ # find the oep by disassembling
22
+ # run it until the oep
23
+ # dump the memory image
24
+ def initialize(file, dumpfile, iat_rva=nil)
25
+ @dumpfile = dumpfile || 'upx-dumped.exe'
26
+ @iat = iat_rva
27
+
28
+ puts 'disassembling UPX loader...'
29
+ pe = PE.decode_file(file)
30
+ @oep = find_oep(pe)
31
+ raise 'cant find oep...' if not @oep
32
+ puts "oep found at #{Expression[@oep]}"
33
+ @baseaddr = pe.optheader.image_base
34
+ @iat -= @baseaddr if @iat > @baseaddr # va => rva
35
+
36
+ @dbg = OS.current.create_process(file).debugger
37
+ puts 'running...'
38
+ debugloop
39
+ end
40
+
41
+ # disassemble the upx stub to find a cross-section jump (to the real entrypoint)
42
+ def find_oep(pe)
43
+ dasm = pe.disassemble_fast 'entrypoint'
44
+
45
+ return if not jmp = dasm.decoded.find { |addr, di|
46
+ # check only once per basic block
47
+ next if not di.block_head?
48
+ b = di.block
49
+ # our target has only one follower
50
+ next if b.to_subfuncret.to_a.length != 0 or b.to_normal.to_a.length != 1
51
+ to = b.to_normal.first
52
+ # ignore jump to unmmaped address
53
+ next if not s = dasm.get_section_at(to)
54
+ # ignore jump to same section
55
+ next if dasm.get_section_at(di.address) == s
56
+
57
+ # gotcha !
58
+ true
59
+ }
60
+
61
+ # now jmp is a couple [addr, di], we extract and normalize the oep from there
62
+ dasm.normalize(jmp[1].block.to_normal.first)
63
+ end
64
+
65
+ def debugloop
66
+ # set up a oneshot breakpoint on oep
67
+ @dbg.hwbp(@oep, :x, 1, true) { breakpoint_callback }
68
+ @dbg.run_forever
69
+ puts 'done'
70
+ end
71
+
72
+ def breakpoint_callback
73
+ puts 'breakpoint hit !'
74
+
75
+ # dump the process
76
+ # create a genuine PE object from the memory image
77
+ dump = LoadedPE.memdump @dbg.memory, @baseaddr, @oep, @iat
78
+
79
+ # the UPX loader unpacks everything in sections marked read-only in the PE header, make them writeable
80
+ dump.sections.each { |s| s.characteristics |= ['MEM_WRITE'] }
81
+
82
+ # write the PE file to disk
83
+ dump.encode_file @dumpfile
84
+
85
+ puts 'dump complete'
86
+ ensure
87
+ # kill the process
88
+ @dbg.kill
89
+ end
90
+ end
91
+
92
+ if __FILE__ == $0
93
+ # args: packed [unpacked] [iat rva]
94
+ UPXUnpacker.new(ARGV.shift, ARGV.shift, (Integer(ARGV.shift) rescue nil))
95
+ end
@@ -0,0 +1,1929 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ # This sample hacks in the ruby interpreter to allow dynamic loading of shellcodes as object methods
7
+ # Also it allows raw modifications to the ruby interpreter memory, for all kind of purposes
8
+ # Includes methods to dump the ruby parser AST from the interpreter memory
9
+ # elf/linux/x86 only
10
+
11
+ require 'metasm'
12
+
13
+
14
+ module Metasm
15
+ class RubyHack < DynLdr
16
+ # basic C defs for ruby AST - ruby1.8 only !
17
+ RUBY_INTERN_NODE = <<EOS
18
+ struct node {
19
+ long flags;
20
+ char *file;
21
+ long a1;
22
+ long a2;
23
+ long a3;
24
+ };
25
+ #define FL_USHIFT 11
26
+ #define nd_type(n) ((((struct node*)n)->flags >> FL_USHIFT) & 0xff)
27
+ EOS
28
+ NODETYPE = [
29
+ :method, :fbody, :cfunc, :scope, :block,
30
+ :if, :case, :when, :opt_n, :while,
31
+ :until, :iter, :for, :break, :next,
32
+ :redo, :retry, :begin, :rescue, :resbody,
33
+ :ensure, :and, :or, :not, :masgn,
34
+ :lasgn, :dasgn, :dasgn_curr, :gasgn, :iasgn,
35
+ :cdecl, :cvasgn, :cvdecl, :op_asgn1, :op_asgn2,
36
+ :op_asgn_and, :op_asgn_or, :call, :fcall, :vcall,
37
+ :super, :zsuper, :array, :zarray, :hash,
38
+ :return, :yield, :lvar, :dvar, :gvar, # 50
39
+ :ivar, :const, :cvar, :nth_ref, :back_ref,
40
+ :match, :match2, :match3, :lit, :str,
41
+ :dstr, :xstr, :dxstr, :evstr, :dregx,
42
+ :dregx_once, :args, :argscat, :argspush, :splat,
43
+ :to_ary, :svalue, :block_arg, :block_pass, :defn,
44
+ :defs, :alias, :valias, :undef, :class,
45
+ :module, :sclass, :colon2, :colon3, :cref,
46
+ :dot2, :dot3, :flip2, :flip3, :attrset,
47
+ :self, :nil, :true, :false, :defined,
48
+ :newline, :postexe, :alloca, :dmethod, :bmethod, # 100
49
+ :memo, :ifunc, :dsym, :attrasgn, :last
50
+ ]
51
+
52
+ new_api_c 'void rb_define_method(uintptr_t, char *, uintptr_t (*)(), int)'
53
+ new_api_c 'void *rb_method_node(uintptr_t, unsigned id)'
54
+
55
+ class << self
56
+ def set_class_method_raw(klass, meth, code, nparams)
57
+ memory_perm(str_ptr(code), code.length, 'rwx')
58
+ rb_define_method(rb_obj_to_value(klass), meth, code, nparams)
59
+ end
60
+
61
+ def get_method_node_ptr(klass, meth)
62
+ raise "#{klass.inspect} is not a class" if not klass.kind_of? Module
63
+ rb_method_node(rb_obj_to_value(klass), meth.to_sym.to_i)
64
+ end
65
+
66
+ # sets up rawopcodes as the method implementation for class klass
67
+ # rawopcodes must implement the expected ABI or things will break horribly
68
+ # this method is VERY UNSAFE, and breaks everything put in place by the ruby interpreter
69
+ # use with EXTREME CAUTION
70
+ # nargs arglist
71
+ # -2 self, arg_ary
72
+ # -1 argc, VALUE*argv, self
73
+ # >=0 self, arg0, arg1..
74
+ def set_method_binary(klass, methodname, raw, nargs=nil)
75
+ nargs ||= klass.instance_method(methodname).arity
76
+ if raw.kind_of? EncodedData
77
+ baseaddr = str_ptr(raw.data)
78
+ bd = raw.binding(baseaddr)
79
+ raw.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(0, ext) or raise "unknown symbol #{ext}" }
80
+ raw.fixup(bd)
81
+ raw = raw.data
82
+ end
83
+ (@@prevent_gc ||= {})[[klass, methodname]] = raw
84
+ set_class_method_raw(klass, methodname.to_s, raw, nargs)
85
+ end
86
+
87
+ # same as load_binary_method but with an object and not a class
88
+ def set_singleton_method_binary(obj, *a)
89
+ set_method_binary((class << obj ; self ; end), *a)
90
+ end
91
+
92
+ def read_method_ast(klass, meth)
93
+ read_node get_method_node_ptr(klass, meth)
94
+ end
95
+
96
+ def read_singleton_method_ast(klass, meth)
97
+ klass = (class << klass ; self ; end)
98
+ read_method_ast(klass, meth)
99
+ end
100
+
101
+ def read_node(ptr, cur=nil)
102
+ return if ptr == 0 or ptr == 4
103
+
104
+ type = NODETYPE[(memory_read_int(ptr) >> 11) & 0xff]
105
+ v1 = memory_read_int(ptr+8)
106
+ v2 = memory_read_int(ptr+12)
107
+ v3 = memory_read_int(ptr+16)
108
+
109
+ case type
110
+ when :block, :array, :hash
111
+ cur = nil if cur and cur[0] != type
112
+ cur ||= [type]
113
+ cur << read_node(v1)
114
+ n = read_node(v3, cur)
115
+ raise "block->next = #{n.inspect}" if n and n[0] != type
116
+ cur
117
+ when :newline
118
+ read_node(v3) # debug/trace usage only
119
+ when :if
120
+ [type, read_node(v1), read_node(v2), read_node(v3)]
121
+ when :cfunc
122
+ v2 = {0xffffffff => -1, 0xfffffffe => -2, 0xffffffffffffffff => -1, 0xfffffffffffffffe => -2}[v2] || v2
123
+ [type, {:fptr => v1, # c func pointer
124
+ :arity => v2}]
125
+ when :scope
126
+ [type, {:localnr => (v1 != 0 && v1 != 4 ? memory_read_int(v1) : 0), # nr of local vars (+2 for $_/$~)
127
+ :cref => read_node(v2)[1..-1]}, # node, starting point for const/@@var resolution
128
+ read_node(v3)]
129
+ when :cref
130
+ cur = nil if cur and cur[0] != type
131
+ cur ||= [type]
132
+ cur << rb_value_to_obj(v1) if v1 != 0 and v1 != 4
133
+ n = read_node(v3, cur)
134
+ raise "block->next = #{n.inspect}" if n and n[0] != type
135
+ cur
136
+ when :call, :fcall, :vcall
137
+ [type, read_node(v1), v2.id2name, read_node(v3)]
138
+ when :dstr
139
+ ret = [type, [:str, rb_value_to_obj(v1)]]
140
+ if args = read_node(v3)
141
+ raise "#{ret.inspect} with args != array: #{args.inspect}" if args[0] != :array
142
+ ret.concat args[1..-1]
143
+ end
144
+ ret
145
+ when :zarray
146
+ [:array]
147
+ when :lasgn
148
+ [type, v3, read_node(v2)]
149
+ when :iasgn, :dasgn, :dasgn_curr, :gasgn, :cvasgn
150
+ [type, v1.id2name, read_node(v2)]
151
+ when :masgn
152
+ # multiple assignment: a, b = 42 / lambda { |x, y| }.call(1, 2)
153
+ # v3 = remainder storage (a, b, *c = ary => v3=c)
154
+ [type, read_node(v1), read_node(v2), read_node(v3)]
155
+ when :attrasgn
156
+ [type, ((v1 == 1) ? :self : read_node(v1)), v2.id2name, read_node(v3)]
157
+ when :lvar
158
+ [type, v3]
159
+ when :ivar, :dvar, :gvar, :cvar, :const, :attrset
160
+ [type, v1.id2name]
161
+ when :str
162
+ # cannot use _id2ref here, probably the parser does not use standard alloced objects
163
+ s = memory_read(memory_read_int(v1+12), memory_read_int(v1+16))
164
+ [type, s]
165
+ when :lit
166
+ [type, rb_value_to_obj(v1)]
167
+ when :args # specialcased by rb_call0, invalid in rb_eval
168
+ cnt = v3 # nr of required args, copied directly to local_vars
169
+ opt = read_node(v1) # :block to execute for each missing arg / with N optargs specified, skip N 1st statements
170
+ rest = read_node(v2) # catchall arg in def foo(rq1, rq2, *rest)
171
+ [type, cnt, opt, rest]
172
+ when :and, :or
173
+ [type, read_node(v1), read_node(v2)] # shortcircuit
174
+ when :not
175
+ [type, read_node(v2)]
176
+ when :nil, :true, :false, :self
177
+ [type]
178
+ when :redo, :retry
179
+ [type]
180
+ when :case
181
+ # [:case, var_test, [:when, cnd, action, [:when, cnd2, action2, else]]]
182
+ # => [:case, var_test, [:when, cnd, action], [:when, cnd2, action], else]
183
+ cs = [type, read_node(v1), read_node(v2)]
184
+ cs << cs[-1].pop while cs[-1][0] == :when and cs[-1][3]
185
+ cs
186
+ when :when
187
+ # [:when, [:array, [test]], then, else]
188
+ [type, read_node(v1), read_node(v2), read_node(v3)]
189
+ when :iter
190
+ # save a block for the following funcall
191
+ args = read_node(v1) # assignments with nil, not realized, just to store the arg list (multi args -> :masgn)
192
+ body = read_node(v2) # the body statements (multi -> :block)
193
+ subj = read_node(v3) # the stuff which is passed the block, probably a :call
194
+ [type, args, body, subj]
195
+ when :while, :until
196
+ [type, read_node(v1), read_node(v2), v3]
197
+ when :return, :break, :next, :defined
198
+ [type, read_node(v1)]
199
+ when :to_ary
200
+ [type, read_node(v1)]
201
+ when :colon2
202
+ [type, read_node(v1), v2.id2name]
203
+ when :colon3 # ::Stuff
204
+ [type, v2.id2name]
205
+ when :method
206
+ [type, v1, read_node(v2), v3]
207
+ when :alias
208
+ [type, v1, v2, v3] # ?
209
+ when :evstr
210
+ [type, read_node(v2)]
211
+ when :dot2, :dot3
212
+ [type, read_node(v1), read_node(v2)]
213
+ when :splat
214
+ [type, read_node(v1)]
215
+ when :argscat
216
+ [type, read_node(v1), read_node(v2), v3]
217
+ when :block_pass
218
+ # [args, block, receiver]: foo(bar, &baz) => [:bpass, [:array, bar], [:lvar, baz], [:call, 'foo', bar]] (args in v1&v3!)
219
+ [type, read_node(v1), read_node(v2), read_node(v3)]
220
+ when :block_arg
221
+ [type, v1.id2name, v2, v3]
222
+ when :ensure
223
+ [type, read_node(v1), v2, read_node(v3)]
224
+ else
225
+ puts "unhandled #{type.inspect}"
226
+ [type, v1, v2, v3]
227
+ end
228
+ end
229
+ end # class << self
230
+ end
231
+
232
+ # a ruby2c C generator for use in the current ruby interpreter
233
+ # generates C suitable for shellcode compilation & insertion in the current interpreter
234
+ # has hardcoded addresses etc
235
+ class RubyLiveCompiler
236
+ attr_accessor :cp
237
+
238
+ RUBY_H = <<EOS
239
+ #{DynLdr::RUBY_H}
240
+
241
+ VALUE rb_ivar_get(VALUE, unsigned);
242
+ VALUE rb_ivar_set(VALUE, unsigned, VALUE);
243
+ VALUE rb_ivar_defined(VALUE, unsigned);
244
+ VALUE rb_cvar_get(VALUE, unsigned);
245
+ VALUE rb_cvar_set(VALUE, unsigned, VALUE, int);
246
+ VALUE rb_gv_get(const char*);
247
+ VALUE rb_gv_set(const char*, VALUE);
248
+
249
+ VALUE rb_ary_new(void);
250
+ VALUE rb_ary_new4(long, VALUE*);
251
+ VALUE rb_ary_push(VALUE, VALUE);
252
+ VALUE rb_ary_pop(VALUE);
253
+ VALUE rb_ary_shift(VALUE);
254
+ VALUE rb_hash_new(void);
255
+ VALUE rb_hash_aset(VALUE, VALUE, VALUE);
256
+ VALUE rb_str_new(const char*, long);
257
+ VALUE rb_str_new2(const char*);
258
+ VALUE rb_str_cat2(VALUE, const char*);
259
+ VALUE rb_str_concat(VALUE, VALUE);
260
+ VALUE rb_str_append(VALUE, VALUE);
261
+ VALUE rb_obj_as_string(VALUE);
262
+ VALUE rb_range_new(VALUE, VALUE, int exclude_end);
263
+ VALUE rb_Array(VALUE); // :splat
264
+ VALUE rb_ary_to_ary(VALUE);
265
+ VALUE rb_hash_aref(VALUE, VALUE);
266
+ VALUE rb_funcall3(VALUE, unsigned, int, VALUE*);
267
+ VALUE rb_singleton_class(VALUE);
268
+ VALUE rb_block_proc(void);
269
+ void rb_define_method(VALUE, char *, VALUE (*)(), int);
270
+ void *rb_method_node(VALUE, unsigned);
271
+ EOS
272
+
273
+ class Fail < RuntimeError
274
+ end
275
+
276
+ def self.compile(klass, *methlist)
277
+ @rcp ||= new
278
+ methlist.each { |meth|
279
+ ast = RubyHack.read_method_ast(klass, meth)
280
+ n = @rcp.compile(ast, klass, meth)
281
+ next if not n
282
+ raw = RubyHack.compile_c(@rcp.cp.dump_definition(n)).encoded
283
+ RubyHack.set_method_binary(klass, meth, raw)
284
+ }
285
+ self
286
+ end
287
+
288
+ def dump(m=nil)
289
+ m ? @cp.dump_definition(m) : @cp.to_s
290
+ end
291
+
292
+ attr_accessor :optim_hint
293
+ def initialize(cp=nil)
294
+ @cp = cp || DynLdr.host_cpu.new_cparser
295
+ @cp.parse RUBY_H
296
+ @iter_break = nil
297
+ @optim_hint = {}
298
+ end
299
+
300
+ # convert a ruby AST to a new C function
301
+ # returns the new function name
302
+ def compile(ast, klass, meth, singleton=false)
303
+ return if not ast
304
+
305
+ # TODO handle arbitrary block/yield constructs
306
+ # TODO analyse to find/optimize numeric locals that never need a ruby VALUE (ie native int vs INT2FIX)
307
+ # TODO detect block/closure exported out of the func & abort compilation
308
+
309
+ @klass = klass
310
+ @meth = meth
311
+ @meth_singleton = singleton
312
+
313
+ mname = escape_varname("m_#{@klass}#{singleton ? '.' : '#'}#{@meth}".gsub('::', '_'))
314
+ @cp.parse "static void #{mname}(VALUE self) { }"
315
+ @cur_cfunc = @cp.toplevel.symbol[mname]
316
+ @cur_cfunc.type.type = value # return type = VALUE, w/o 'missing return statement' warning
317
+
318
+ @scope = @cur_cfunc.initializer
319
+
320
+ case ast[0]
321
+ when :ivar # attr_reader
322
+ ret = fcall('rb_ivar_get', rb_self, rb_intern(ast[1]))
323
+ when :attrset # attr_writer
324
+ compile_args(@cur_cfunc, [nil, 1])
325
+ ret = fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), local(2))
326
+ when :scope # standard ruby function
327
+ @cref = ast[1][:cref]
328
+ if ast[2] and ast[2][0] == :block and ast[2][1] and ast[2][1][0] == :args
329
+ compile_args(@cur_cfunc, ast[2][1])
330
+ end
331
+ want_value = true
332
+ if meth.to_s == 'initialize' and not singleton
333
+ want_value = false
334
+ end
335
+ ret = ast_to_c(ast[2], @scope, want_value)
336
+ ret = rb_nil if not want_value
337
+ #when :cfunc # native ruby extension
338
+ else raise "unhandled function ast #{ast.inspect}"
339
+ end
340
+
341
+ @scope.statements << C::Return.new(ret)
342
+
343
+ mname
344
+ end
345
+
346
+ # return the arity of method 'name' on self
347
+ def method_arity(name=@meth)
348
+ @meth_singleton ? @klass.method(name).arity : @klass.instance_method(name).arity
349
+ end
350
+
351
+ # find the scope where constname is defined from @cref
352
+ def resolve_const_owner(constname)
353
+ @cref.find { |cr| cr.constants.map { |c| c.to_s }.include? constname.to_s }
354
+ end
355
+
356
+ # checks if ast maps to a constant, returns it if it does
357
+ def check_const(ast)
358
+ case ast[0]
359
+ when :const
360
+ resolve_const_owner(ast[1])
361
+ when :colon2
362
+ if cst = check_const(ast[1])
363
+ cst.const_get(ast[2])
364
+ end
365
+ when :colon3
366
+ ::Object.const_get(ast[2])
367
+ end
368
+ end
369
+
370
+ def compile_args(func, args)
371
+ case method_arity
372
+ when -1 # args[1] == 0 and (args[2] or args[3])
373
+ compile_args_m1(func, args)
374
+ when -2 # args[1] > 0 and (args[2] or args[3])
375
+ compile_args_m2(func, args)
376
+ else
377
+ # fixed arity = args[1]: VALUE func(VALUE self, VALUE local_2, VALUE local_3)
378
+ args[1].times { |i|
379
+ v = C::Variable.new("local_#{i+2}", value)
380
+ @scope.symbol[v.name] = v
381
+ func.type.args << v
382
+ }
383
+ end
384
+ end
385
+
386
+ # update func prototype to reflect arity -1
387
+ # VALUE func(int argc, VALUE *argv, VALUE self)
388
+ def compile_args_m1(func, args)
389
+ c = C::Variable.new("arg_c", C::BaseType.new(:int, :unsigned))
390
+ v = C::Variable.new("arg_v", C::Pointer.new(value))
391
+ @scope.symbol[c.name] = c
392
+ @scope.symbol[v.name] = v
393
+ func.type.args.unshift v
394
+ func.type.args.unshift c
395
+
396
+ args[1].times { |i|
397
+ local(i+2, C::CExpression[v, :'[]', [i]])
398
+ }
399
+
400
+ if args[2]
401
+ # [:block, [:lasgn, 2, [:lit, 4]]]
402
+ raise Fail, "unhandled vararglist #{args.inspect}" if args[2][0] != :block
403
+ args[2][1..-1].each_with_index { |a, i|
404
+ raise Fail, "unhandled arg #{a.inspect}" if a[0] != :lasgn
405
+ cnd = C::CExpression[c, :>, i]
406
+ thn = C::CExpression[local(a[1], :none), :'=', [v, :'[]', [i]]]
407
+ els = C::Block.new(@scope)
408
+ ast_to_c(a, els, false)
409
+ @scope.statements << C::If.new(cnd, thn, els)
410
+ }
411
+ end
412
+
413
+ if args[3]
414
+ raise Fail, "unhandled vararglist3 #{args.inspect}" if args[3][0] != :lasgn
415
+ skiplen = args[1] + args[2].length - 1
416
+ alloc = fcall('rb_ary_new4', [c, :-, [skiplen]], [v, :+, [skiplen]])
417
+ local(args[3][1], C::CExpression[[c, :>, skiplen], :'?:', [alloc, fcall('rb_ary_new')]])
418
+ end
419
+ end
420
+
421
+ # update func prototype to reflect arity -2
422
+ # VALUE func(VALUE self, VALUE arg_array)
423
+ def compile_args_m2(func, args)
424
+ v = C::Variable.new("arglist", value)
425
+ @scope.symbol[v.name] = v
426
+ func.type.args << v
427
+
428
+ args[1].times { |i|
429
+ local(i+2, fcall('rb_ary_shift', v))
430
+ }
431
+
432
+ # populate arguments with default values
433
+ if args[2]
434
+ # [:block, [:lasgn, 2, [:lit, 4]]]
435
+ raise Fail, "unhandled vararglist #{args.inspect}" if args[2][0] != :block
436
+ args[2][1..-1].each { |a|
437
+ raise Fail, "unhandled arg #{a.inspect}" if a[0] != :lasgn
438
+ t = C::CExpression[local(a[1], :none), :'=', fcall('rb_ary_shift', v)]
439
+ e = C::Block.new(@scope)
440
+ ast_to_c([:lasgn, a[1], a[2]], e, false)
441
+ @scope.statements << C::If.new(rb_ary_len(v), t, e)
442
+ }
443
+ end
444
+
445
+ if args[3]
446
+ raise Fail, "unhandled vararglist3 #{args.inspect}" if args[3][0] != :lasgn
447
+ local(args[3][1], C::CExpression[v])
448
+ end
449
+ end
450
+
451
+ # compile a case/when
452
+ # create a real C switch() for Fixnums, and put the others === in the default case
453
+ # XXX will get the wrong order for "case x; when 1; when Fixnum; when 3;" ...
454
+ def compile_case(ast, scope, want_value)
455
+ # this generates
456
+ # var = stuff_to_test()
457
+ # if (var & 1)
458
+ # switch (var >> 1) {
459
+ # case 12:
460
+ # stuff();
461
+ # break;
462
+ # default:
463
+ # goto default_case;
464
+ # }
465
+ # else
466
+ # default_case:
467
+ # if (var == true.object_id || rb_test(rb_funcall(bla, '===', var)))
468
+ # foo();
469
+ # else {
470
+ # default();
471
+ # }
472
+ #
473
+ if want_value == true
474
+ ret = get_new_tmp_var('case', want_value)
475
+ want_value = ret
476
+ elsif want_value
477
+ ret = want_value
478
+ end
479
+
480
+ var = ast_to_c(ast[1], scope, want_value || true)
481
+ if not var.kind_of? C::Variable
482
+ ret ||= get_new_tmp_var('case', want_value)
483
+ scope.statements << C::CExpression[ret, :'=', var]
484
+ var = ret
485
+ end
486
+
487
+ # the scope to put all case int in
488
+ body_int = C::Block.new(scope)
489
+ # the scope to put the if (cs === var) cascade
490
+ body_other_head = body_other = nil
491
+ default = nil
492
+
493
+ ast[2..-1].each { |cs|
494
+ if cs[0] == :when
495
+ raise Fail if cs[1][0] != :array
496
+
497
+ # numeric case, add a case to body_int
498
+ if cs[1][1..-1].all? { |cd| cd[0] == :lit and (cd[1].kind_of? Fixnum or cd[1].kind_of? Range) }
499
+ cs[1][1..-1].each { |cd|
500
+ if cd[1].kind_of? Range
501
+ b = cd[1].begin
502
+ e = cd[1].end
503
+ e -= 1 if cd[1].exclude_end?
504
+ raise Fail unless b.kind_of? Integer and e.kind_of? Integer
505
+ body_int.statements << C::Case.new(b, e, nil)
506
+ else
507
+ body_int.statements << C::Case.new(cd[1], nil, nil)
508
+ end
509
+ }
510
+ cb = C::Block.new(scope)
511
+ v = ast_to_c(cs[2], cb, want_value)
512
+ cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
513
+ cb.statements << C::Break.new
514
+ body_int.statements << cb
515
+
516
+ # non-numeric (or mixed) case, add if ( cs === var )
517
+ else
518
+ cnd = nil
519
+ cs[1][1..-1].each { |cd|
520
+ if (cd[0] == :lit and (cd[1].kind_of?(Fixnum) or cd[1].kind_of?(Symbol))) or
521
+ [:nil, :true, :false].include?(cd[0])
522
+ # true C equality
523
+ cd = C::CExpression[var, :==, ast_to_c(cd, scope)]
524
+ else
525
+ # own block for ast_to_c to honor lazy evaluation
526
+ tb = C::Block.new(scope)
527
+ test = rb_test(rb_funcall(ast_to_c(cd, tb), '===', var), tb)
528
+ # discard own block unless needed
529
+ if tb.statements.empty?
530
+ cd = test
531
+ else
532
+ tb.statements << test
533
+ cd = C::CExpression[tb, value]
534
+ end
535
+ end
536
+ cnd = (cnd ? C::CExpression[cnd, :'||', cd] : cd)
537
+ }
538
+ cb = C::Block.new(scope)
539
+ v = ast_to_c(cs[2], cb, want_value)
540
+ cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
541
+
542
+ fu = C::If.new(cnd, cb, nil)
543
+
544
+ if body_other
545
+ body_other.belse = fu
546
+ else
547
+ body_other_head = fu
548
+ end
549
+ body_other = fu
550
+ end
551
+
552
+ # default case statement
553
+ else
554
+ cb = C::Block.new(scope)
555
+ v = ast_to_c(cs, cb, want_value)
556
+ cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
557
+ default = cb
558
+ end
559
+ }
560
+
561
+ # if we use the value of the case, we must add an 'else: nil'
562
+ if want_value and not default
563
+ default = C::Block.new(scope)
564
+ default.statements << C::CExpression[ret, :'=', rb_nil]
565
+ end
566
+
567
+ # assemble everything
568
+ scope.statements <<
569
+ if body_int.statements.empty?
570
+ if body_other
571
+ body_other.belse = default
572
+ body_other_head
573
+ else
574
+ raise Fail, "empty case? #{ast.inspect}" if not default
575
+ default
576
+ end
577
+ else
578
+ if body_other_head
579
+ @default_label_cnt ||= 0
580
+ dfl = "default_label_#{@default_label_cnt += 1}"
581
+ body_other_head = C::Label.new(dfl, body_other_head)
582
+ body_int.statements << C::Case.new('default', nil, C::Goto.new(dfl))
583
+ body_other.belse = default if default
584
+ end
585
+ body_int = C::Switch.new(C::CExpression[var, :>>, 1], body_int)
586
+ C::If.new(C::CExpression[var, :&, 1], body_int, body_other_head)
587
+ end
588
+
589
+ ret
590
+ end
591
+
592
+ # create a C::CExpr[toplevel.symbol[name], :funcall, args]
593
+ # casts int/strings in arglist to CExpr
594
+ def fcall(fname, *arglist)
595
+ args = arglist.map { |a| (a.kind_of?(Integer) or a.kind_of?(String)) ? [a] : a }
596
+ fv = @cp.toplevel.symbol[fname]
597
+ raise "need prototype for #{fname}!" if not fv
598
+ C::CExpression[fv, :funcall, args]
599
+ end
600
+
601
+ # the VALUE typedef
602
+ def value
603
+ @cp.toplevel.symbol['VALUE']
604
+ end
605
+
606
+ # declare a new function variable
607
+ # no initializer if init == :none
608
+ def declare_newvar(name, initializer)
609
+ v = C::Variable.new(name, value)
610
+ v.initializer = initializer if initializer != :none
611
+ @scope.symbol[v.name] = v
612
+ @scope.statements << C::Declaration.new(v)
613
+ v
614
+ end
615
+
616
+ # return a string suitable for use as a variable name
617
+ # hexencode any char not in [A-z0-9_]
618
+ def escape_varname(n)
619
+ n.gsub(/[^\w]/) { |c| c.unpack('H*')[0] }
620
+ end
621
+
622
+ # retrieve or create a local var
623
+ # pass :none to avoid initializer
624
+ def get_var(name, initializer=:none)
625
+ name = escape_varname(name)
626
+ @scope.symbol[name] ||= declare_newvar(name, initializer || rb_nil)
627
+ end
628
+
629
+ # create a new temporary variable
630
+ # XXX put_var ?
631
+ def get_new_tmp_var(base=nil, var=nil)
632
+ return var if var.kind_of? C::Variable
633
+ @tmp_var_id ||= 0
634
+ get_var("tmp_#{"#{base}_" if base}#{@tmp_var_id += 1}")
635
+ end
636
+
637
+ # retrieve/create a new local variable with optionnal initializer
638
+ def local(n, init=nil)
639
+ get_var "local_#{n}", init
640
+ end
641
+
642
+ # retrieve/create a new dynamic variable (block argument/variable)
643
+ # pass :none to avoid initializer
644
+ def dvar(n, init=nil)
645
+ get_var "dvar_#{n}", init
646
+ end
647
+
648
+ # retrieve self (1st func arg)
649
+ def rb_self
650
+ @scope.symbol['self']
651
+ end
652
+
653
+ # returns a CExpr casting expr to a VALUE*
654
+ def rb_cast_pvalue(expr, idx)
655
+ C::CExpression[[[expr], C::Pointer.new(value)], :'[]', [idx]]
656
+ end
657
+
658
+ # retrieve the current class, from self->klass
659
+ # XXX will segfault with self.kind_of? Fixnum/true/false/nil/sym
660
+ def rb_selfclass
661
+ rb_cast_pvalue(rb_self, 1)
662
+ end
663
+
664
+ def rb_nil
665
+ C::CExpression[[nil.object_id], value]
666
+ end
667
+ def rb_true
668
+ C::CExpression[[true.object_id], value]
669
+ end
670
+ def rb_false
671
+ C::CExpression[[false.object_id], value]
672
+ end
673
+
674
+ # call rb_intern on a string
675
+ def rb_intern(n)
676
+ # use the current interpreter's value
677
+ C::CExpression[n.to_sym.to_i]
678
+ end
679
+
680
+ # create a rb_funcall construct
681
+ def rb_funcall(recv, meth, *args)
682
+ fcall('rb_funcall', recv, rb_intern(meth), args.length, *args)
683
+ end
684
+
685
+ # ruby bool test of a var
686
+ # assigns to a temporary var, and check against false/nil
687
+ def rb_test(expr, scope)
688
+ if nil.object_id == 0 or false.object_id == 0 # just to be sure
689
+ nf = nil.object_id | false.object_id
690
+ C::CExpression[[expr, :|, nf], :'!=', nf]
691
+ else
692
+ if expr.kind_of? C::Variable
693
+ tmp = expr
694
+ else
695
+ tmp = get_new_tmp_var('test')
696
+ scope.statements << C::CExpression[tmp, :'=', expr]
697
+ end
698
+ C::CExpression[[tmp, :'!=', rb_nil], :'&&', [tmp, :'!=', rb_false]]
699
+ end
700
+ end
701
+
702
+ # generate C code to raise a RuntimeError, reason
703
+ def rb_raise(reason, cls='rb_eRuntimeError')
704
+ fcall('rb_raise', rb_global(cls), reason)
705
+ end
706
+
707
+ # return a C expr equivallent to TYPE(expr) == type for non-immediate types
708
+ # XXX expr evaluated 3 times
709
+ def rb_test_class_type(expr, type)
710
+ C::CExpression[[[expr, :>, [7]], :'&&', [[expr, :&, [3]], :==, [0]]], :'&&', [[rb_cast_pvalue(expr, 0), :&, [0x3f]], :'==', [type]]]
711
+ end
712
+
713
+ # return a C expr equivallent to TYPE(expr) == T_ARRAY
714
+ def rb_test_class_ary(expr)
715
+ rb_test_class_type(expr, 9)
716
+ end
717
+ # ARY_PTR(expr)
718
+ def rb_ary_ptr(expr, idx=nil)
719
+ p = C::CExpression[[rb_cast_pvalue(expr, 4)], C::Pointer.new(value)]
720
+ idx ? C::CExpression[p, :'[]', [idx]] : p
721
+ end
722
+ # ARY_LEN(expr)
723
+ def rb_ary_len(expr)
724
+ rb_cast_pvalue(expr, 2)
725
+ end
726
+
727
+ # TYPE(expr) == T_STRING
728
+ def rb_test_class_string(expr)
729
+ rb_test_class_type(expr, 7)
730
+ end
731
+ # STR_PTR(expr)
732
+ def rb_str_ptr(expr, idx=nil)
733
+ p = C::CExpression[[rb_cast_pvalue(expr, 3)], C::Pointer.new(C::BaseType.new(:char))]
734
+ idx ? C::CExpression[p, :'[]', [idx]] : p
735
+ end
736
+ # STR_LEN(expr)
737
+ def rb_str_len(expr)
738
+ rb_cast_pvalue(expr, 2)
739
+ end
740
+
741
+ def rb_test_class_hash(expr)
742
+ rb_test_class_type(expr, 0xb)
743
+ end
744
+
745
+ # returns a static pointer to the constant
746
+ def rb_const(constname, owner = resolve_const_owner(constname))
747
+ raise Fail, "no dynamic constant resolution #{constname}" if not owner
748
+ cst = owner.const_get(constname)
749
+ C::CExpression[[RubyHack.rb_obj_to_value(cst)], value]
750
+ end
751
+
752
+ # compile a :masgn
753
+ def rb_masgn(ast, scope, want_value)
754
+ raise Fail, "masgn with no rhs #{ast.inspect}" if not ast[2]
755
+ raise Fail, "masgn with no lhs array #{ast.inspect}" if not ast[1] or ast[1][0] != :array
756
+ if not want_value and ast[2][0] == :array and not ast[3] and ast[2].length == ast[1].length
757
+ rb_masgn_optimized(ast, scope)
758
+ return nil.object_id
759
+ end
760
+ full = get_new_tmp_var('masgn', want_value)
761
+ ary = ast_to_c(ast[2], scope, full)
762
+ scope.statements << C::CExpression[full, :'=', ary] if full != ary
763
+ ast[1][1..-1].each_with_index { |e, i|
764
+ raise Fail, "weird masgn lhs #{e.inspect} in #{ast.inspect}" if e[-1] != nil
765
+ # local_42 = full[i]
766
+ e = e.dup
767
+ e[-1] = [:rb2cstmt, rb_ary_ptr(full, i)]
768
+ ast_to_c(e, scope, false)
769
+ }
770
+ if ast[3]
771
+ raise Fail, "weird masgn lhs #{e.inspect} in #{ast.inspect}" if ast[3][-1] != nil
772
+ # local_28 = full[12..-1].to_a
773
+ e = ast[3].dup
774
+ e[-1] = [:call, [:call, [:rb2cvar, full.name], '[]', [:array, [:dot2, [:lit, ast[1].length-1], [:lit, -1]]]], 'to_a']
775
+ ast_to_c(e, scope, false)
776
+ end
777
+
778
+ full
779
+ end
780
+
781
+ def rb_global(cname)
782
+ @cp.toplevel.symbol[cname]
783
+ end
784
+
785
+ # compile an optimized :masgn with rhs.length == lhs.length (no need of a ruby array)
786
+ def rb_masgn_optimized(ast, scope)
787
+ vars = []
788
+ ast[2][1..-1].each { |rhs|
789
+ var = get_new_tmp_var('masgn_opt')
790
+ vars << var
791
+ r = ast_to_c(rhs, scope, var)
792
+ scope.statements << C::CExpression[var, :'=', r] if var != r
793
+ }
794
+ ast[1][1..-1].each { |lhs|
795
+ var = vars.shift
796
+ lhs = lhs.dup
797
+ raise Fail, "weird masgn lhs #{lhs.inspect} in #{ast.inspect}" if lhs[-1] != nil
798
+ lhs[-1] = [:rb2cvar, var.name]
799
+ ast_to_c(lhs, scope, false)
800
+ }
801
+ end
802
+
803
+ # the recursive AST to C compiler
804
+ # may append C statements to scope
805
+ # returns the C::CExpr holding the VALUE of the current ruby statement
806
+ # want_value is an optionnal hint as to the returned VALUE is needed or not
807
+ # if want_value is a C::Variable, the statements should try to populate this var instead of some random tmp var
808
+ # eg to simplify :if encoding unless we have 'foo = if 42;..'
809
+ def ast_to_c(ast, scope, want_value = true)
810
+ ret =
811
+ case ast.to_a[0]
812
+ when :block
813
+ if ast[1]
814
+ ast[1..-2].each { |a| ast_to_c(a, scope, false) }
815
+ ast_to_c(ast.last, scope, want_value)
816
+ end
817
+
818
+ when :lvar
819
+ local(ast[1])
820
+ when :lasgn
821
+ if scope == @scope
822
+ l = local(ast[1], :none)
823
+ else
824
+ # w = 4 if false ; p w => should be nil
825
+ l = local(ast[1])
826
+ end
827
+ st = ast_to_c(ast[2], scope, l)
828
+ scope.statements << C::CExpression[l, :'=', st] if st != l
829
+ l
830
+ when :dvar
831
+ dvar(ast[1])
832
+ when :dasgn_curr
833
+ l = dvar(ast[1])
834
+ st = ast_to_c(ast[2], scope, l)
835
+ scope.statements << C::CExpression[l, :'=', st] if st != l
836
+ l
837
+ when :ivar
838
+ fcall('rb_ivar_get', rb_self, rb_intern(ast[1]))
839
+ when :iasgn
840
+ if want_value
841
+ tmp = get_new_tmp_var("ivar_#{ast[1]}", want_value)
842
+ scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
843
+ scope.statements << fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), tmp)
844
+ tmp
845
+ else
846
+ scope.statements << fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), ast_to_c(ast[2], scope))
847
+ end
848
+ when :cvar
849
+ fcall('rb_cvar_get', rb_selfclass, rb_intern(ast[1]))
850
+ when :cvasgn
851
+ if want_value
852
+ tmp = get_new_tmp_var("cvar_#{ast[1]}", want_value)
853
+ scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
854
+ scope.statements << fcall('rb_cvar_set', rb_selfclass, rb_intern(ast[1]), tmp, rb_false)
855
+ tmp
856
+ else
857
+ scope.statements << fcall('rb_cvar_set', rb_selfclass, rb_intern(ast[1]), ast_to_c(ast[2], scope), rb_false)
858
+ end
859
+ when :gvar
860
+ fcall('rb_gv_get', ast[1])
861
+ when :gasgn
862
+ if want_value
863
+ tmp = get_new_tmp_var("gvar_#{ast[1]}", want_value)
864
+ scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
865
+ scope.statements << fcall('rb_gv_set', ast[1], tmp)
866
+ tmp
867
+ else
868
+ scope.statements << fcall('rb_gv_set', ast[1], ast_to_c(ast[2], scope))
869
+ end
870
+ when :attrasgn # foo.bar= 42 (same as :call, except for return value)
871
+ recv = ast_to_c(ast[1], scope)
872
+ raise Fail, "unsupported #{ast.inspect}" if not ast[3] or ast[3][0] != :array
873
+ if ast[3].length != 2
874
+ if ast[2] != '[]=' or ast[3].length != 3
875
+ raise Fail, "unsupported #{ast.inspect}"
876
+ end
877
+ # foo[4] = 2
878
+ idx = ast_to_c(ast[3][1], scope)
879
+ end
880
+ arg = ast_to_c(ast[3].last, scope)
881
+ if want_value
882
+ tmp = get_new_tmp_var('call', want_value)
883
+ scope.statements << C::CExpression[tmp, :'=', arg]
884
+ end
885
+ if idx
886
+ scope.statements << rb_funcall(recv, ast[2], idx, arg)
887
+ else
888
+ scope.statements << rb_funcall(recv, ast[2], arg)
889
+ end
890
+ tmp
891
+
892
+ when :rb2cvar # hax, used in vararg parsing
893
+ get_var(ast[1])
894
+ when :rb2cstmt
895
+ ast[1]
896
+
897
+ when :block_arg
898
+ local(ast[3], fcall('rb_block_proc'))
899
+
900
+ when :lit
901
+ case ast[1]
902
+ when Symbol
903
+ # XXX ID2SYM
904
+ C::CExpression[[rb_intern(ast[1].to_s), :<<, 8], :|, 0xe]
905
+ when Range
906
+ fcall('rb_range_new', ast[1].begin.object_id, ast[1].end.object_id, ast[1].exclude_end? ? 0 : 1)
907
+ else # true/false/nil/fixnum
908
+ ast[1].object_id
909
+ end
910
+ when :self
911
+ rb_self
912
+ when :str
913
+ fcall('rb_str_new2', ast[1])
914
+ when :array
915
+ tmp = get_new_tmp_var('ary', want_value)
916
+ scope.statements << C::CExpression[tmp, :'=', fcall('rb_ary_new')]
917
+ ast[1..-1].each { |e|
918
+ scope.statements << fcall('rb_ary_push', tmp, ast_to_c(e, scope))
919
+ }
920
+ tmp
921
+ when :hash
922
+ raise Fail, "bad #{ast.inspect}" if ast[1][0] != :array
923
+ tmp = get_new_tmp_var('hash', want_value)
924
+ scope.statements << C::CExpression[tmp, :'=', fcall('rb_hash_new')]
925
+ ki = nil
926
+ ast[1][1..-1].each { |k|
927
+ if not ki
928
+ ki = k
929
+ else
930
+ scope.statements << fcall('rb_hash_aset', tmp, ast_to_c(ki, scope), ast_to_c(k, scope))
931
+ ki = nil
932
+ end
933
+ }
934
+ tmp
935
+
936
+ when :iter
937
+ if v = optimize_iter(ast, scope, want_value)
938
+ return v
939
+ end
940
+ # for full support of :iter, we need access to the interpreter's ruby_block private global variable in eval.c
941
+ # we can find it by analysing rb_block_given_p, but this won't work with a static precompiled rubyhack...
942
+ # even with access to ruby_block, there we would need to redo PUSH_BLOCK, create a temporary dvar list,
943
+ # handle [:break, lol], and do all the stack magic reused in rb_yield (probably incl setjmp etc)
944
+ raise Fail, "unsupported iter #{ast[3].inspect} { | #{ast[1].inspect} | #{ast[2].inspect} }"
945
+
946
+ when :call, :vcall, :fcall
947
+ if v = optimize_call(ast, scope, want_value)
948
+ return v
949
+ end
950
+ recv = ((ast[0] == :call) ? ast_to_c(ast[1], scope) : rb_self)
951
+ if not ast[3]
952
+ f = rb_funcall(recv, ast[2])
953
+ elsif ast[3][0] == :array
954
+ args = ast[3][1..-1].map { |a| ast_to_c(a, scope) }
955
+ f = rb_funcall(recv, ast[2], *args)
956
+ elsif ast[3][0] == :splat
957
+ args = ast_to_c(ast[3], scope)
958
+ if not args.kind_of? C::Variable
959
+ tmp = get_new_tmp_var('args', want_value)
960
+ scope.statements << C::CExpression[tmp, :'=', args]
961
+ args = tmp
962
+ end
963
+ f = fcall('rb_funcall3', recv, rb_intern(ast[2]), rb_ary_len(args), rb_ary_ptr(args))
964
+ # elsif ast[3][0] == :argscat
965
+ else
966
+ raise Fail, "unsupported #{ast.inspect}"
967
+ end
968
+ if want_value
969
+ tmp ||= get_new_tmp_var('call', want_value)
970
+ scope.statements << C::CExpression[tmp, :'=', f]
971
+ tmp
972
+ else
973
+ scope.statements << f
974
+ f
975
+ end
976
+
977
+ when :if, :when
978
+ if ast[0] == :when and ast[1][0] == :array
979
+ cnd = nil
980
+ ast[1][1..-1].map { |cd| rb_test(ast_to_c(cd, scope), scope) }.each { |cd|
981
+ cnd = (cnd ? C::CExpression[cnd, :'||', cd] : cd)
982
+ }
983
+ else
984
+ cnd = rb_test(ast_to_c(ast[1], scope), scope)
985
+ end
986
+
987
+ tbdy = C::Block.new(scope)
988
+ ebdy = C::Block.new(scope) if ast[3] or want_value
989
+
990
+ if want_value
991
+ tmp = get_new_tmp_var('if', want_value)
992
+ thn = ast_to_c(ast[2], tbdy, tmp)
993
+ tbdy.statements << C::CExpression[tmp, :'=', thn] if tmp != thn
994
+ if ast[3]
995
+ els = ast_to_c(ast[3], ebdy, tmp)
996
+ else
997
+ # foo = if bar ; baz ; end => nil if !bar
998
+ els = rb_nil
999
+ end
1000
+ ebdy.statements << C::CExpression[tmp, :'=', els] if tmp != els
1001
+ else
1002
+ ast_to_c(ast[2], tbdy, false)
1003
+ ast_to_c(ast[3], ebdy, false)
1004
+ end
1005
+
1006
+ scope.statements << C::If.new(cnd, tbdy, ebdy)
1007
+
1008
+ tmp
1009
+
1010
+ when :while, :until
1011
+ pib = @iter_break
1012
+ @iter_break = nil # XXX foo = while ()...
1013
+
1014
+ body = C::Block.new(scope)
1015
+ if ast[3] == 0 # do .. while();
1016
+ ast_to_c(ast[2], body, false)
1017
+ end
1018
+ t = nil
1019
+ e = C::Break.new
1020
+ t, e = e, t if ast[0] == :until
1021
+ body.statements << C::If.new(rb_test(ast_to_c(ast[1], body), body), t, e)
1022
+ if ast[3] != 0 # do .. while();
1023
+ ast_to_c(ast[2], body, false)
1024
+ end
1025
+ scope.statements << C::For.new(nil, nil, nil, body)
1026
+
1027
+ @iter_break = pib
1028
+ nil.object_id
1029
+
1030
+ when :and, :or, :not
1031
+ # beware lazy evaluation !
1032
+ tmp = get_new_tmp_var('and', want_value)
1033
+ v1 = ast_to_c(ast[1], scope, tmp)
1034
+ # and/or need that tmp has the actual v1 value (returned when shortcircuit)
1035
+ scope.statements << C::CExpression[tmp, :'=', v1] if v1 != tmp
1036
+ v1 = tmp
1037
+ case ast[0]
1038
+ when :and
1039
+ t = C::Block.new(scope)
1040
+ v2 = ast_to_c(ast[2], t, tmp)
1041
+ t.statements << C::CExpression[tmp, :'=', v2] if v2 != tmp
1042
+ when :or
1043
+ e = C::Block.new(scope)
1044
+ v2 = ast_to_c(ast[2], e, tmp)
1045
+ e.statements << C::CExpression[tmp, :'=', v2] if v2 != tmp
1046
+ when :not
1047
+ t = C::CExpression[tmp, :'=', rb_false]
1048
+ e = C::CExpression[tmp, :'=', rb_true]
1049
+ end
1050
+ scope.statements << C::If.new(rb_test(v1, scope), t, e)
1051
+ tmp
1052
+ when :return
1053
+ scope.statements << C::Return.new(ast_to_c(ast[1], scope))
1054
+ nil.object_id
1055
+ when :break
1056
+ if @iter_break
1057
+ v = (ast[1] ? ast_to_c(ast[1], scope, @iter_break) : nil.object_id)
1058
+ scope.statements << C::CExpression[@iter_break, :'=', [[v], value]] if @iter_break != v
1059
+ end
1060
+ scope.statements << C::Break.new
1061
+ nil.object_id
1062
+
1063
+ when nil, :args
1064
+ nil.object_id
1065
+ when :nil
1066
+ rb_nil
1067
+ when :false
1068
+ rb_false
1069
+ when :true
1070
+ rb_true
1071
+ when :const
1072
+ rb_const(ast[1])
1073
+ when :colon2
1074
+ if cst = check_const(ast[1])
1075
+ rb_const(ast[2], cst)
1076
+ else
1077
+ fcall('rb_const_get', ast_to_c(ast[1], scope), rb_intern(ast[2]))
1078
+ end
1079
+ when :colon3
1080
+ rb_const(ast[1], ::Object)
1081
+ when :defined
1082
+ case ast[1][0]
1083
+ when :ivar
1084
+ fcall('rb_ivar_defined', rb_self, rb_intern(ast[1][1]))
1085
+ else
1086
+ raise Fail, "unsupported #{ast.inspect}"
1087
+ end
1088
+ when :masgn
1089
+ # parallel assignment: put everything in an Array, then pop everything back?
1090
+ rb_masgn(ast, scope, want_value)
1091
+
1092
+ when :evstr
1093
+ fcall('rb_obj_as_string', ast_to_c(ast[1], scope))
1094
+ when :dot2, :dot3
1095
+ fcall('rb_range_new', ast_to_c(ast[1], scope), ast_to_c(ast[2], scope), ast[0] == :dot2 ? 0 : 1)
1096
+ when :splat
1097
+ fcall('rb_Array', ast_to_c(ast[1], scope))
1098
+ when :to_ary
1099
+ fcall('rb_ary_to_ary', ast_to_c(ast[1], scope))
1100
+ when :dstr
1101
+ # dynamic string: "foo#{bar}baz"
1102
+ tmp = get_new_tmp_var('dstr')
1103
+ scope.statements << C::CExpression[tmp, :'=', fcall('rb_str_new2', ast[1][1])]
1104
+ ast[2..-1].compact.each { |s|
1105
+ if s[0] == :str # directly append the char*
1106
+ scope.statements << fcall('rb_str_cat2', tmp, s[1])
1107
+ else
1108
+ scope.statements << fcall('rb_str_append', tmp, ast_to_c(s, scope))
1109
+ end
1110
+ }
1111
+ tmp
1112
+ when :case
1113
+ compile_case(ast, scope, want_value)
1114
+ when :ensure
1115
+ # TODO
1116
+ ret = ast_to_c(ast[1], scope, want_value)
1117
+ ast_to_c(ast[3], scope, false)
1118
+ ret
1119
+ else
1120
+ raise Fail, "unsupported #{ast.inspect}"
1121
+ end
1122
+
1123
+ if want_value
1124
+ ret = C::CExpression[[ret], value] if ret.kind_of? Integer or ret.kind_of? String
1125
+ ret
1126
+ end
1127
+ end
1128
+
1129
+ # optional optimization of a call (eg a == 1, c+2, ...)
1130
+ # return nil for normal rb_funcall, or a C::CExpr to use as retval.
1131
+ def optimize_call(ast, scope, want_value)
1132
+ ce = C::CExpression
1133
+ op = ast[2]
1134
+ int = C::BaseType.new(:ptr) # signed VALUE
1135
+ args = ast[3][1..-1] if ast[3] and ast[3][0] == :array
1136
+ arg0 = args[0] if args and args[0]
1137
+
1138
+ if arg0 and arg0[0] == :lit and arg0[1].kind_of? Fixnum
1139
+ # optimize 'x==42', 'x+42', 'x-42'
1140
+ o2 = arg0[1]
1141
+ return if not %w[== > < >= <= + -].include? op
1142
+ if o2 < 0 and ['+', '-'].include? op
1143
+ # need o2 >= 0 for overflow detection
1144
+ op = {'+' => '-', '-' => '+'}[op]
1145
+ o2 = -o2
1146
+ return if not o2.kind_of? Fixnum # -0x40000000
1147
+ end
1148
+
1149
+ int_v = o2.object_id
1150
+ recv = ast_to_c(ast[1], scope)
1151
+ tmp = get_new_tmp_var('opt', want_value)
1152
+ if not recv.kind_of? C::Variable
1153
+ scope.statements << ce[tmp, :'=', recv]
1154
+ recv = tmp
1155
+ end
1156
+
1157
+ case op
1158
+ when '=='
1159
+ # XXX assume == only return true for full equality: if not Fixnum, then always false
1160
+ # which breaks 1.0 == 1 and maybe others, but its ok
1161
+ scope.statements << C::If.new(ce[recv, :'==', [int_v]], ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
1162
+ when '>', '<', '>=', '<='
1163
+ # do the actual comparison on signed >>1 if both Fixnum
1164
+ t = C::If.new(
1165
+ ce[[[[recv], int], :>>, [1]], op.to_sym, [[[int_v], int], :>>, [1]]],
1166
+ ce[tmp, :'=', rb_true],
1167
+ ce[tmp, :'=', rb_false])
1168
+ # fallback to actual rb_funcall
1169
+ e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
1170
+ add_optimized_statement scope, ast[1], recv, 'fixnum' => t, 'other' => e
1171
+ when '+'
1172
+ e = ce[recv, :+, [int_v-1]] # overflow to Bignum ?
1173
+ cnd = ce[[recv, :&, [1]], :'&&', [[[recv], int], :<, [[e], int]]]
1174
+ t = ce[tmp, :'=', e]
1175
+ e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
1176
+ if @optim_hint[ast[1]] == 'fixnum'
1177
+ # add_optimized_statement wont handle the overflow check correctly
1178
+ scope.statements << t
1179
+ else
1180
+ scope.statements << C::If.new(cnd, t, e)
1181
+ end
1182
+ when '-'
1183
+ e = ce[recv, :-, [int_v-1]]
1184
+ cnd = ce[[recv, :&, [1]], :'&&', [[[recv], int], :>, [[e], int]]]
1185
+ t = ce[tmp, :'=', e]
1186
+ e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
1187
+ if @optim_hint[ast[1]] == 'fixnum'
1188
+ scope.statements << t
1189
+ else
1190
+ scope.statements << C::If.new(cnd, t, e)
1191
+ end
1192
+ end
1193
+ tmp
1194
+
1195
+ # Symbol#==
1196
+ elsif arg0 and arg0[0] == :lit and arg0[1].kind_of? Symbol and op == '=='
1197
+ s_v = ast_to_c(arg0, scope)
1198
+ tmp = get_new_tmp_var('opt', want_value)
1199
+ recv = ast_to_c(ast[1], scope, tmp)
1200
+ if not recv.kind_of? C::Variable
1201
+ scope.statements << ce[tmp, :'=', recv]
1202
+ recv = tmp
1203
+ end
1204
+
1205
+ scope.statements << C::If.new(ce[recv, :'==', [s_v]], ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
1206
+ tmp
1207
+
1208
+ elsif arg0 and op == '<<'
1209
+ tmp = get_new_tmp_var('opt', want_value)
1210
+ recv = ast_to_c(ast[1], scope, tmp)
1211
+ arg = ast_to_c(arg0, scope)
1212
+ if recv != tmp
1213
+ scope.statements << ce[tmp, :'=', recv]
1214
+ recv = tmp
1215
+ end
1216
+
1217
+ ar = fcall('rb_ary_push', recv, arg)
1218
+ st = fcall('rb_str_concat', recv, arg)
1219
+ oth = rb_funcall(recv, op, arg)
1220
+ oth = ce[tmp, :'=', oth] if want_value
1221
+
1222
+ add_optimized_statement scope, ast[1], recv, 'ary' => ar, 'string' => st, 'other' => oth
1223
+ tmp
1224
+
1225
+ elsif arg0 and args.length == 1 and op == '[]'
1226
+ return if ast[1][0] == :const # Expression[42]
1227
+ tmp = get_new_tmp_var('opt', want_value)
1228
+ recv = ast_to_c(ast[1], scope, tmp)
1229
+ if not recv.kind_of? C::Variable
1230
+ scope.statements << ce[tmp, :'=', recv]
1231
+ recv = tmp
1232
+ end
1233
+
1234
+ idx = get_new_tmp_var('idx')
1235
+ arg = ast_to_c(arg0, scope, idx)
1236
+ if not arg.kind_of? C::Variable
1237
+ scope.statements << ce[idx, :'=', arg]
1238
+ arg = idx
1239
+ end
1240
+ idx = ce[[idx], int]
1241
+
1242
+ ar = C::Block.new(scope)
1243
+ ar.statements << ce[idx, :'=', [[[arg], int], :>>, [1]]]
1244
+ ar.statements << C::If.new(ce[idx, :<, [0]], ce[idx, :'=', [idx, :+, rb_ary_len(recv)]], nil)
1245
+ ar.statements << C::If.new(ce[[idx, :<, [0]], :'||', [idx, :>=, [[rb_ary_len(recv)], int]]],
1246
+ ce[tmp, :'=', rb_nil],
1247
+ ce[tmp, :'=', rb_ary_ptr(recv, idx)])
1248
+ st = C::Block.new(scope)
1249
+ st.statements << ce[idx, :'=', [[[arg], int], :>>, [1]]]
1250
+ st.statements << C::If.new(ce[idx, :<, [0]], ce[idx, :'=', [idx, :+, rb_str_len(recv)]], nil)
1251
+ st.statements << C::If.new(ce[[idx, :<, [0]], :'||', [idx, :>=, [[rb_str_len(recv)], int]]],
1252
+ ce[tmp, :'=', rb_nil],
1253
+ ce[tmp, :'=', [[[[rb_str_ptr(recv, idx), :&, [0xff]], :<<, [1]], :|, [1]], value]])
1254
+ hsh = ce[tmp, :'=', fcall('rb_hash_aref', recv, arg)]
1255
+ oth = ce[tmp, :'=', rb_funcall(recv, op, arg)]
1256
+
1257
+ # ary/string only valid with fixnum argument !
1258
+ add_optimized_statement scope, ast[1], recv, 'hash' => hsh, 'other' => oth,
1259
+ 'ary_bnd' => ce[tmp, :'=', rb_ary_ptr(recv, ce[[[arg], int], :>>, [1]])],
1260
+ ce[[arg, :&, 1], :'&&', rb_test_class_ary(recv)] => ar,
1261
+ ce[[arg, :&, 1], :'&&', rb_test_class_string(recv)] => st
1262
+ tmp
1263
+
1264
+ elsif ast[1] and not arg0 and op == 'empty?'
1265
+ tmp = get_new_tmp_var('opt', want_value)
1266
+ recv = ast_to_c(ast[1], scope, tmp)
1267
+ if not recv.kind_of? C::Variable
1268
+ scope.statements << ce[tmp, :'=', recv]
1269
+ recv = tmp
1270
+ end
1271
+
1272
+ ar = C::If.new(rb_ary_len(recv), ce[tmp, :'=', rb_false], ce[tmp, :'=', rb_true])
1273
+
1274
+ add_optimized_statement scope, ast[1], recv, 'ary' => ar,
1275
+ 'other' => ce[tmp, :'=', rb_funcall(recv, op)]
1276
+ tmp
1277
+
1278
+ elsif ast[1] and not arg0 and op == 'pop'
1279
+ tmp = get_new_tmp_var('opt', want_value)
1280
+ recv = ast_to_c(ast[1], scope, tmp)
1281
+ if not recv.kind_of? C::Variable
1282
+ scope.statements << ce[tmp, :'=', recv]
1283
+ recv = tmp
1284
+ end
1285
+
1286
+ t = fcall('rb_ary_pop', recv)
1287
+ e = rb_funcall(recv, op)
1288
+ if want_value
1289
+ t = ce[tmp, :'=', t]
1290
+ e = ce[tmp, :'=', e]
1291
+ end
1292
+
1293
+ add_optimized_statement scope, ast[1], recv, 'ary' => t, 'other' => e
1294
+
1295
+ tmp
1296
+
1297
+ elsif ast[1] and op == 'kind_of?' and arg0 and (arg0[0] == :const or arg0[0] == :colon3)
1298
+ # TODO check const maps to toplevel when :const
1299
+ test =
1300
+ case arg0[1]
1301
+ when 'Symbol'
1302
+ tmp = get_new_tmp_var('kindof', want_value)
1303
+ ce[[ast_to_c(ast[1], scope, tmp), :'&', [0xf]], :'==', [0xe]]
1304
+ #when 'Numeric', 'Integer'
1305
+ when 'Fixnum'
1306
+ tmp = get_new_tmp_var('kindof', want_value)
1307
+ ce[ast_to_c(ast[1], scope, tmp), :'&', [0x1]]
1308
+ when 'Array'
1309
+ rb_test_class_ary(ast_to_c(ast[1], scope))
1310
+ when 'String'
1311
+ rb_test_class_string(ast_to_c(ast[1], scope))
1312
+ else return
1313
+ end
1314
+ puts "shortcut may be incorrect for #{ast.inspect}" if arg0[0] == :const
1315
+ tmp ||= get_new_tmp_var('kindof', want_value)
1316
+ scope.statements << C::If.new(test, ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
1317
+ tmp
1318
+
1319
+ elsif not ast[1] or ast[1] == [:self]
1320
+ optimize_call_static(ast, scope, want_value)
1321
+ end
1322
+ end
1323
+
1324
+ # check if the var falls in an optim_hint, if so generate only selected code
1325
+ # optim is a hash varclass (keyof @optim_hint) => c_stmt
1326
+ # optim key can also be a C::Stmt that is used in the If clause
1327
+ # if optim['ary'] == optim['ary_bnd'], you can omit the latter
1328
+ # must have an 'other' key that is calls the generic ruby method
1329
+ def add_optimized_statement(scope, varid, varc, optim={})
1330
+ cat = @optim_hint[varid]
1331
+ cat = 'ary' if cat == 'ary_bnd' and not optim['ary_bnd']
1332
+ if not st = optim[cat]
1333
+ st = optim['other']
1334
+ if not cat and optim.keys.all? { |k| k.kind_of? String }
1335
+ # no need to cascade if we have a hash and can optim ary only
1336
+ optim.each { |i, s|
1337
+ case i
1338
+ when 'ary'; st = C::If.new(rb_test_class_ary(varc), s, st)
1339
+ when 'hash'; st = C::If.new(rb_test_class_hash(varc), s, st)
1340
+ when 'string'; st = C::If.new(rb_test_class_string(varc), s, st)
1341
+ when 'other'; # already done as default case
1342
+ when 'fixnum'; # add test last
1343
+ when C::Statement; st = C::If.new(i, s, st)
1344
+ end
1345
+ }
1346
+ if fs = optim['fixnum']
1347
+ # first test to perform (fast path)
1348
+ st = C::If.new(C::CExpression[varc, :&, 1], fs, st)
1349
+ end
1350
+ end
1351
+ end
1352
+ scope.statements << st
1353
+ end
1354
+
1355
+ # return ptr, arity
1356
+ # ptr is a CExpr pointing to the C func implementing klass#method
1357
+ def get_cfuncptr(klass, method, singleton=false)
1358
+ cls = singleton ? (class << klass ; self ; end) : klass
1359
+ ptr = RubyHack.get_method_node_ptr(cls, method)
1360
+ return if ptr == 0
1361
+ ftype = RubyHack::NODETYPE[(RubyHack.memory_read_int(ptr) >> 11) & 0xff]
1362
+ return if ftype != :cfunc
1363
+ fast = RubyHack.read_node(ptr)
1364
+ arity = fast[1][:arity]
1365
+ fptr = fast[1][:fptr]
1366
+
1367
+ fproto = C::Function.new(value, [])
1368
+ case arity
1369
+ when -1; fproto.args << C::Variable.new(nil, C::BaseType.new(:int)) << C::Variable.new(nil, C::Pointer.new(value)) << C::Variable.new(nil, value)
1370
+ when -2; fproto.args << C::Variable.new(nil, value) << C::Variable.new(nil, value)
1371
+ else (arity+1).times { fproto.args << C::Variable.new(nil, value) }
1372
+ end
1373
+
1374
+ C::CExpression[[fptr], C::Pointer.new(fproto)]
1375
+ end
1376
+
1377
+ # call C funcs directly
1378
+ # assume private function calls are not virtual and hardlink them here
1379
+ def optimize_call_static(ast, scope, want_value)
1380
+ arity = method_arity(ast[2]) rescue return
1381
+ if ast[2].to_s == @meth.to_s
1382
+ # self is recursive
1383
+ fptr = @cur_cfunc
1384
+ else
1385
+ fptr = get_cfuncptr(@klass, ast[2], @meth_singleton)
1386
+ return if not fptr
1387
+ end
1388
+
1389
+ c_arglist = []
1390
+
1391
+ if not ast[3]
1392
+ args = []
1393
+ elsif ast[3][0] == :array
1394
+ args = ast[3][1..-1]
1395
+ elsif ast[3][0] == :splat
1396
+ args = ast_to_c(ast[3], scope)
1397
+ if arity != -2 and !args.kind_of?(C::Variable)
1398
+ tmp = get_new_tmp_var('arg')
1399
+ scope.statements << C::CExpression[tmp, :'=', args]
1400
+ args = tmp
1401
+ end
1402
+ case arity
1403
+ when -2
1404
+ c_arglist << rb_self << args
1405
+ when -1
1406
+ c_arglist << [rb_ary_len(args)] << rb_ary_ptr(args) << rb_self
1407
+ else
1408
+ cnd = C::CExpression[rb_ary_len(args), :'!=', [arity]]
1409
+ scope.statements << C::If.new(cnd, rb_raise("#{arity} args expected", 'rb_eArgumentError'), nil)
1410
+
1411
+ c_arglist << rb_self
1412
+ arity.times { |i| c_arglist << rb_ary_ptr(args, i) }
1413
+ end
1414
+ arity = :canttouchthis
1415
+ else return # TODO
1416
+ end
1417
+
1418
+ case arity
1419
+ when :canttouchthis
1420
+ when -2
1421
+ arg = get_new_tmp_var('arg')
1422
+ scope.statements << C::CExpression[arg, :'=', fcall('rb_ary_new')]
1423
+ args.each { |a|
1424
+ scope.statements << fcall('rb_ary_push', arg, ast_to_c(a, scope))
1425
+ }
1426
+ c_arglist << rb_self << arg
1427
+
1428
+ when -1
1429
+ case args.length
1430
+ when 0
1431
+ argv = C::CExpression[[0], C::Pointer.new(value)]
1432
+ when 1
1433
+ val = ast_to_c(args[0], scope)
1434
+ if not val.kind_of? C::Variable
1435
+ argv = get_new_tmp_var('argv')
1436
+ scope.statements << C::CExpression[argv, :'=', val]
1437
+ val = argv
1438
+ end
1439
+ argv = C::CExpression[:'&', val]
1440
+ else
1441
+ argv = get_new_tmp_var('argv')
1442
+ argv.type = C::Array.new(value, args.length)
1443
+ args.each_with_index { |a, i|
1444
+ val = ast_to_c(a, scope)
1445
+ scope.statements << C::CExpression[[argv, :'[]', [i]], :'=', val]
1446
+ }
1447
+ end
1448
+ c_arglist << [args.length] << argv << rb_self
1449
+
1450
+ else
1451
+ c_arglist << rb_self
1452
+ args.each { |a|
1453
+ va = get_new_tmp_var('arg')
1454
+ val = ast_to_c(a, scope, va)
1455
+ scope.statements << C::CExpression[va, :'=', val] if val != va
1456
+ c_arglist << va
1457
+ }
1458
+ end
1459
+
1460
+ f = C::CExpression[fptr, :funcall, c_arglist]
1461
+ if want_value
1462
+ ret = get_new_tmp_var('ccall', want_value)
1463
+ scope.statements << C::CExpression[ret, :'=', f]
1464
+ ret
1465
+ else
1466
+ scope.statements << f
1467
+ end
1468
+ end
1469
+
1470
+ def optimize_iter(ast, scope, want_value)
1471
+ b_args, b_body, b_recv = ast[1, 3]
1472
+
1473
+ old_ib = @iter_break
1474
+ if want_value
1475
+ # a new tmpvar, so we can overwrite it in 'break :foo'
1476
+ @iter_break = get_new_tmp_var('iterbreak')
1477
+ else
1478
+ @iter_break = nil
1479
+ end
1480
+
1481
+ if b_recv[0] == :call and b_recv[2] == 'reverse_each'
1482
+ # convert ary.reverse_each to ary.reverse.each
1483
+ b_recv = b_recv.dup
1484
+ b_recv[1] = [:call, b_recv[1], 'reverse']
1485
+ b_recv[2] = 'each'
1486
+ elsif b_recv[0] == :call and b_recv[2] == 'each_key'
1487
+ # convert hash.each_key to hash.keys.each
1488
+ b_recv = b_recv.dup
1489
+ b_recv[1] = [:call, b_recv[1], 'keys']
1490
+ b_recv[2] = 'each'
1491
+ end
1492
+
1493
+ # loop { }
1494
+ if b_recv[0] == :fcall and b_recv[2] == 'loop'
1495
+ body = C::Block.new(scope)
1496
+ ast_to_c(b_body, body, false)
1497
+ scope.statements << C::For.new(nil, nil, nil, body)
1498
+
1499
+ # int.times { |i| }
1500
+ elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'times'
1501
+ limit = get_new_tmp_var('limit')
1502
+ recv = ast_to_c(b_recv[1], scope, limit)
1503
+ scope.statements << C::If.new(C::CExpression[:'!', [recv, :&, 1]], rb_raise('only Fixnum#times handled'), nil)
1504
+ if want_value
1505
+ scope.statements << C::CExpression[@iter_break, :'=', recv]
1506
+ end
1507
+ scope.statements << C::CExpression[limit, :'=', [recv, :>>, 1]]
1508
+ cntr = get_new_tmp_var('cntr')
1509
+ cntr.type = C::BaseType.new(:int, :unsigned)
1510
+ body = C::Block.new(scope)
1511
+ if b_args and b_args[0] == :dasgn_curr
1512
+ body.statements << C::CExpression[dvar(b_args[1]), :'=', [[cntr, :<<, 1], :|, 1]]
1513
+ end
1514
+ ast_to_c(b_body, body, false)
1515
+ scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, limit], C::CExpression[:'++', cntr], body)
1516
+
1517
+ # ary.each { |e| }
1518
+ elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'each' and b_args and
1519
+ b_args[0] == :dasgn_curr
1520
+ ary = get_new_tmp_var('ary')
1521
+ recv = ast_to_c(b_recv[1], scope, ary)
1522
+ scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
1523
+ scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#each { |e| } handled'))
1524
+ if want_value
1525
+ scope.statements << C::CExpression[@iter_break, :'=', ary]
1526
+ end
1527
+ cntr = get_new_tmp_var('cntr')
1528
+ cntr.type = C::BaseType.new(:int, :unsigned)
1529
+ body = C::Block.new(scope)
1530
+ if b_args and b_args[0] == :dasgn_curr
1531
+ body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
1532
+ end
1533
+ ast_to_c(b_body, body, false)
1534
+ scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
1535
+
1536
+ # ary.find { |e| }
1537
+ elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'find' and b_args and
1538
+ b_args[0] == :dasgn_curr
1539
+ ary = get_new_tmp_var('ary')
1540
+ recv = ast_to_c(b_recv[1], scope, ary)
1541
+ scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
1542
+ scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#find { |e| } handled'))
1543
+ if want_value
1544
+ scope.statements << C::CExpression[@iter_break, :'=', rb_nil]
1545
+ end
1546
+ cntr = get_new_tmp_var('cntr')
1547
+ cntr.type = C::BaseType.new(:int, :unsigned)
1548
+ body = C::Block.new(scope)
1549
+ if b_args and b_args[0] == :dasgn_curr
1550
+ body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
1551
+ end
1552
+ # same as #each up to this point (except default retval), now add a 'if (body_value) break ary[cntr];'
1553
+ # XXX 'find { next true }'
1554
+
1555
+ found = ast_to_c(b_body, body)
1556
+ t = C::Block.new(body)
1557
+ t.statements << C::CExpression[@iter_break, :'=', rb_ary_ptr(ary, cntr)]
1558
+ t.statements << C::Break.new
1559
+ body.statements << C::If.new(rb_test(found, body), t, nil)
1560
+
1561
+ scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
1562
+
1563
+ # ary.map { |e| }
1564
+ elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'map' and b_args and
1565
+ b_args[0] == :dasgn_curr
1566
+ ary = get_new_tmp_var('ary')
1567
+ recv = ast_to_c(b_recv[1], scope, ary)
1568
+ scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
1569
+ scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#map { |e| } handled'))
1570
+ if want_value
1571
+ scope.statements << C::CExpression[@iter_break, :'=', fcall('rb_ary_new')]
1572
+ end
1573
+ cntr = get_new_tmp_var('cntr')
1574
+ cntr.type = C::BaseType.new(:int, :unsigned)
1575
+ body = C::Block.new(scope)
1576
+ if b_args and b_args[0] == :dasgn_curr
1577
+ body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
1578
+ end
1579
+ # same as #each up to this point (except default retval), now add a '@iter_break << body_value'
1580
+ # XXX 'next' unhandled
1581
+
1582
+ val = ast_to_c(b_body, body)
1583
+ body.statements << fcall('rb_ary_push', @iter_break, val)
1584
+
1585
+ scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
1586
+
1587
+ else
1588
+ @iter_break = old_ib
1589
+ return
1590
+ end
1591
+
1592
+ ret = @iter_break
1593
+ @iter_break = old_ib
1594
+ ret || nil.object_id
1595
+ end
1596
+ end
1597
+
1598
+ # a ruby2c C generator for use in the any ruby interpreter (generates C suitable for use as a standard Ruby extension)
1599
+ class RubyStaticCompiler < RubyLiveCompiler
1600
+ # add a new ruby function to the current @cp
1601
+ def self.compile(klass, *methlist)
1602
+ @rcp ||= new
1603
+ methlist.each { |meth|
1604
+ ast = RubyHack.read_method_ast(klass, meth)
1605
+ @rcp.compile(ast, klass, meth)
1606
+ }
1607
+ self
1608
+ end
1609
+
1610
+ def self.compile_singleton(klass, *methlist)
1611
+ @rcp ||= new
1612
+ methlist.each { |meth|
1613
+ ast = RubyHack.read_singleton_method_ast(klass, meth)
1614
+ @rcp.compile(ast, klass, meth, true)
1615
+ }
1616
+ self
1617
+ end
1618
+
1619
+ def self.dump
1620
+ <<EOS + @rcp.cp.dump_definition('Init_compiledruby')
1621
+ #ifdef __ELF__
1622
+ asm .pt_gnu_stack rw;
1623
+ #endif
1624
+ EOS
1625
+ end
1626
+
1627
+ def dump(m="Init_compiledruby")
1628
+ m ? @cp.dump_definition(m, 'do_init_once') : @cp.to_s
1629
+ end
1630
+
1631
+ def initialize(cp=nil)
1632
+ super(cp)
1633
+
1634
+ @cp.parse <<EOS
1635
+ // static VALUE method(VALUE self, VALUE arg0, VALUE arg1) { return (VALUE)0; }
1636
+ // static VALUE const_Lol;
1637
+ static void do_init_once(void) {
1638
+ // const_Lol = rb_const_get(*rb_cObject, rb_intern("Lol"));
1639
+ // rb_define_method(const_Lol, "method", method, 2);
1640
+ }
1641
+
1642
+ int Init_compiledruby(void) __attribute__((export)) {
1643
+ // use a separate func to avoid having to append statements before the 'return'
1644
+ do_init_once();
1645
+ return 0;
1646
+ }
1647
+ EOS
1648
+ end
1649
+
1650
+ # returns the 'do_init_once' function body
1651
+ def init
1652
+ @cp.toplevel.symbol['do_init_once'].initializer
1653
+ end
1654
+
1655
+ def compile(ast, klass, method, singleton=false)
1656
+ @compiled_func_cache ||= {}
1657
+
1658
+ mname = super(ast, klass, method, singleton)
1659
+ return if not mname
1660
+
1661
+ @compiled_func_cache[[klass, method.to_s, singleton]] = @cur_cfunc
1662
+
1663
+ cls = rb_const(nil, klass)
1664
+
1665
+ init.statements << fcall("rb_define#{'_singleton' if singleton}_method", cls, method.to_s, @cur_cfunc, method_arity)
1666
+
1667
+ mname
1668
+ end
1669
+
1670
+ def declare_newtopvar(name, initializer, type=value)
1671
+ v = C::Variable.new(name, type)
1672
+ v.storage = :static
1673
+ @cp.toplevel.symbol[v.name] = v
1674
+ pos = @cp.toplevel.statements.index @cp.toplevel.statements.find { |st|
1675
+ st.kind_of? C::Declaration and st.var.type.kind_of? C::Function and st.var.initializer
1676
+ } || -1
1677
+ @cp.toplevel.statements.insert pos, C::Declaration.new(v)
1678
+
1679
+ if initializer
1680
+ pos = -1
1681
+ if name =~ /^intern_/
1682
+ pos = 0
1683
+ init.statements.each { |st|
1684
+ break unless st.kind_of? C::CExpression and st.op == :'=' and st.lexpr.kind_of? C::Variable and st.lexpr.name < name
1685
+ pos += 1
1686
+ }
1687
+ end
1688
+ init.statements.insert(pos, C::CExpression[v, :'=', initializer])
1689
+ end
1690
+
1691
+ v
1692
+ end
1693
+
1694
+ def rb_intern(sym)
1695
+ n = escape_varname("intern_#{sym}")
1696
+ @cp.toplevel.symbol[n] || declare_newtopvar(n, fcall('rb_intern', sym.to_s), C::BaseType.new(:int, :unsigned))
1697
+ end
1698
+
1699
+ # rb_const 'FOO', Bar::Baz ==>
1700
+ # const_Bar = rb_const_get(rb_cObject, rb_intern("Bar"));
1701
+ # const_Bar_Baz = rb_const_get(const_Bar, rb_intern("Baz"));
1702
+ # const_Bar_Baz_FOO = rb_const_get(const_Bar_Baz, rb_intern("FOO"));
1703
+ # use rb_const(nil, class) to get a pointer to a class/module
1704
+ def rb_const(constname, owner = resolve_const_owner(constname))
1705
+ raise Fail, "no dynamic constant resolution #{constname}" if not owner
1706
+
1707
+ @const_value ||= { [::Object, 'Object'] => rb_global('rb_cObject') }
1708
+
1709
+ k = ::Object
1710
+ v = nil
1711
+ cname = owner.name
1712
+ cname += '::' + constname if constname
1713
+ cname.split('::').each { |n|
1714
+ kk = k.const_get(n)
1715
+ if not v = @const_value[[k, n]]
1716
+ # class A ; end ; B = A => B.name => 'A'
1717
+ vn = "const_#{escape_varname((k.name + '::' + n).sub(/^Object::/, '').gsub('::', '_'))}"
1718
+ vi = fcall('rb_const_get', rb_const(nil, k), fcall('rb_intern', n))
1719
+ v = declare_newtopvar(vn, vi)
1720
+ # n wont be reused, so do not alloc a global intern_#{n} for this
1721
+ @const_value[[k, n]] = v
1722
+ end
1723
+ k = kk
1724
+ }
1725
+ v
1726
+ end
1727
+
1728
+ # TODO remove this when the C compiler is fixed
1729
+ def rb_global(cname)
1730
+ C::CExpression[:*, @cp.toplevel.symbol[cname]]
1731
+ end
1732
+
1733
+ def get_cfuncptr(klass, method, singleton=false)
1734
+ # is it a func we have in the current cparser ?
1735
+ if ptr = @compiled_func_cache[[klass, method.to_s, singleton]]
1736
+ return ptr
1737
+ end
1738
+
1739
+ # check if it's a C or ruby func in the current interpreter
1740
+ cls = singleton ? (class << klass ; self ; end) : klass
1741
+ ptr = RubyHack.get_method_node_ptr(cls, method)
1742
+ return if ptr == 0
1743
+ ftype = RubyHack::NODETYPE[(RubyHack.memory_read_int(ptr) >> 11) & 0xff]
1744
+ return if ftype != :cfunc
1745
+
1746
+ # ok, so assume it will be the same next time
1747
+ n = escape_varname "fptr_#{klass.name}#{singleton ? '.' : '#'}#{method}".gsub('::', '_')
1748
+ if not v = @cp.toplevel.symbol[n]
1749
+ v = get_cfuncptr_dyn(klass, method, singleton, n)
1750
+ end
1751
+
1752
+ v
1753
+ end
1754
+
1755
+ def get_cfuncptr_dyn(klass, method, singleton, n)
1756
+ arity = singleton ? klass.method(method).arity : klass.instance_method(method).arity
1757
+ fproto = C::Function.new(value, [])
1758
+ case arity
1759
+ when -1; fproto.args << C::Variable.new(nil, C::BaseType.new(:int)) << C::Variable.new(nil, C::Pointer.new(value)) << C::Variable.new(nil, value)
1760
+ when -2; fproto.args << C::Variable.new(nil, value) << C::Variable.new(nil, value)
1761
+ else (arity+1).times { fproto.args << C::Variable.new(nil, value) }
1762
+ end
1763
+
1764
+ if not ptr = init.symbol['ptr']
1765
+ ptr = C::Variable.new('ptr', C::Pointer.new(C::BaseType.new(:int)))
1766
+ init.symbol[ptr.name] = ptr
1767
+ init.statements << C::Declaration.new(ptr)
1768
+ end
1769
+
1770
+ cls = rb_const(nil, klass)
1771
+ cls = fcall('rb_singleton_class', cls) if singleton
1772
+ init.statements << C::CExpression[ptr, :'=', fcall('rb_method_node', cls, rb_intern(method))]
1773
+
1774
+ # dynamically recheck that klass#method is a :cfunc
1775
+ cnd = C::CExpression[[:'!', ptr], :'||', [[[[ptr, :'[]', [0]], :>>, [11]], :&, [0xff]], :'!=', [RubyHack::NODETYPE.index(:cfunc)]]]
1776
+ init.statements << C::If.new(cnd, rb_raise("CFunc expected at #{klass}#{singleton ? '.' : '#'}#{method}"), nil)
1777
+
1778
+ vi = C::CExpression[[ptr, :'[]', [1]], C::Pointer.new(fproto)]
1779
+ declare_newtopvar(n, vi, C::Pointer.new(fproto))
1780
+ end
1781
+
1782
+ if defined? $trace_rbfuncall and $trace_rbfuncall
1783
+ # dynamic trace of all rb_funcall made from our module
1784
+ def rb_funcall(recv, meth, *args)
1785
+ if not defined? @rb_fcid
1786
+ @cp.parse <<EOS
1787
+ int atexit(void(*)(void));
1788
+ int printf(char*, ...);
1789
+
1790
+ static unsigned rb_fcid_max = 1;
1791
+ static unsigned rb_fcntr[1];
1792
+
1793
+ static void rb_fcstat(void)
1794
+ {
1795
+ unsigned i;
1796
+ for (i=0 ; i<rb_fcid_max ; ++i)
1797
+ if (rb_fcntr[i])
1798
+ printf("%u %u\\n", i, rb_fcntr[i]);
1799
+ }
1800
+ EOS
1801
+ @rb_fcid = -1
1802
+ @rb_fcntr = @cp.toplevel.symbol['rb_fcntr']
1803
+ @rb_fcid_max = @cp.toplevel.symbol['rb_fcid_max']
1804
+ init.statements << fcall('atexit', @cp.toplevel.symbol['rb_fcstat'])
1805
+ end
1806
+ @rb_fcid += 1
1807
+ @rb_fcid_max.initializer = C::CExpression[[@rb_fcid+1], @rb_fcid_max.type]
1808
+ @rb_fcntr.type.length = @rb_fcid+1
1809
+
1810
+ ctr = C::CExpression[:'++', [@rb_fcntr, :'[]', [@rb_fcid]]]
1811
+ C::CExpression[ctr, :',', super(recv, meth, *args)]
1812
+ end
1813
+ end
1814
+ end
1815
+ end
1816
+
1817
+
1818
+
1819
+
1820
+ if __FILE__ == $0 or ARGV.delete('ignore_argv0')
1821
+
1822
+ demo = case ARGV.first
1823
+ when nil; :test_jit
1824
+ when 'asm'; :inlineasm
1825
+ when 'generate'; :generate_persistent
1826
+ else :compile_ruby
1827
+ end
1828
+
1829
+
1830
+ case demo
1831
+ when :inlineasm
1832
+ # cnt.times { sys_write str }
1833
+ src_asm = <<EOS
1834
+ mov ecx, [ebp+8]
1835
+ again:
1836
+ push ecx
1837
+
1838
+ mov eax, 4
1839
+ mov ebx, 1
1840
+ mov ecx, [ebp+12]
1841
+ mov edx, [ebp+16]
1842
+ int 80h
1843
+
1844
+ pop ecx
1845
+ loop again
1846
+ EOS
1847
+
1848
+ src = <<EOS
1849
+ #{Metasm::RubyLiveCompiler::RUBY_H}
1850
+
1851
+ void doit(int, char*, int);
1852
+ VALUE foo(VALUE self, VALUE count, VALUE str) {
1853
+ doit(VAL2INT(count), STR_PTR(str), STR_LEN(str));
1854
+ return count;
1855
+ }
1856
+
1857
+ void doit(int count, char *str, int strlen) { asm(#{src_asm.inspect}); }
1858
+ EOS
1859
+
1860
+ class Foo
1861
+ end
1862
+
1863
+ m = Metasm::RubyHack.compile_c(src).encoded
1864
+
1865
+ Metasm::RubyHack.set_method_binary(Foo, 'bar', m, 2)
1866
+
1867
+ Foo.new.bar(4, "blabla\n")
1868
+ Foo.new.bar(2, "foo\n")
1869
+
1870
+
1871
+ when :compile_ruby
1872
+ abort 'need <class#method>' if ARGV.empty?
1873
+ require 'pp'
1874
+ puts '#if 0'
1875
+ ARGV.each { |av|
1876
+ next if not av =~ /^(.*)([.#])(.*)$/
1877
+ cls, sg, meth = $1, $2, $3.to_sym
1878
+ sg = { '.' => true, '#' => false }[sg]
1879
+ cls = cls.split('::').inject(::Object) { |o, cst| o.const_get(cst) }
1880
+ if sg
1881
+ ast = Metasm::RubyHack.read_singleton_method_ast(cls, meth)
1882
+ cls.method(meth) if not ast # raise NoMethodError
1883
+ puts ' --- ast ---'
1884
+ pp ast
1885
+ Metasm::RubyStaticCompiler.compile_singleton(cls, meth)
1886
+ else
1887
+ ast = Metasm::RubyHack.read_method_ast(cls, meth)
1888
+ cls.instance_method(meth) if not ast
1889
+ puts ' --- ast ---'
1890
+ pp ast
1891
+ Metasm::RubyStaticCompiler.compile(cls, meth)
1892
+ end
1893
+ }
1894
+ puts '', ' --- C ---', '#endif'
1895
+ puts Metasm::RubyStaticCompiler.dump
1896
+
1897
+
1898
+ when :test_jit
1899
+ class Foo
1900
+ def bla(x=500)
1901
+ i = 0
1902
+ x.times { i += 16 }
1903
+ i
1904
+ end
1905
+ end
1906
+
1907
+ t0 = Time.now
1908
+ Metasm::RubyLiveCompiler.compile(Foo, :bla)
1909
+ t1 = Time.now
1910
+ ret = Foo.new.bla(0x401_0000)
1911
+ puts ret.to_s(16), ret.class
1912
+ t2 = Time.now
1913
+
1914
+ puts "compile %.3fs run %.3fs" % [t1-t0, t2-t1]
1915
+
1916
+ when :generate_persistent
1917
+ Metasm::RubyStaticCompiler.compile(Metasm::Preprocessor, :getchar, :ungetchar, :unreadtok, :readtok_nopp_str, :readtok_nopp, :readtok)
1918
+ Metasm::RubyStaticCompiler.compile(Metasm::Expression, :reduce_rec, :initialize)
1919
+ Metasm::RubyStaticCompiler.compile_singleton(Metasm::Expression, :[])
1920
+ c_src = Metasm::RubyStaticCompiler.dump
1921
+ File.open('compiledruby.c', 'w') { |fd| fd.puts c_src } if $VERBOSE
1922
+ puts 'compiling..'
1923
+ begin ; require 'compiledruby' ; rescue LoadError ; end
1924
+ # To encode to a different file, you must also rename the Init_compliedruby() function to match the lib name
1925
+ Metasm::ELF.compile_c(Metasm::Ia32.new, c_src).encode_file('compiledruby.so')
1926
+ puts 'ruby -r metasm -r compiledruby ftw'
1927
+ end
1928
+
1929
+ end