metasm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. data/BUGS +11 -0
  2. data/CREDITS +17 -0
  3. data/README +270 -0
  4. data/TODO +114 -0
  5. data/doc/code_organisation.txt +146 -0
  6. data/doc/const_missing.txt +16 -0
  7. data/doc/core_classes.txt +75 -0
  8. data/doc/feature_list.txt +53 -0
  9. data/doc/index.txt +59 -0
  10. data/doc/install_notes.txt +170 -0
  11. data/doc/style.css +3 -0
  12. data/doc/use_cases.txt +18 -0
  13. data/lib/metasm.rb +80 -0
  14. data/lib/metasm/arm.rb +12 -0
  15. data/lib/metasm/arm/debug.rb +39 -0
  16. data/lib/metasm/arm/decode.rb +167 -0
  17. data/lib/metasm/arm/encode.rb +77 -0
  18. data/lib/metasm/arm/main.rb +75 -0
  19. data/lib/metasm/arm/opcodes.rb +177 -0
  20. data/lib/metasm/arm/parse.rb +130 -0
  21. data/lib/metasm/arm/render.rb +55 -0
  22. data/lib/metasm/compile_c.rb +1457 -0
  23. data/lib/metasm/dalvik.rb +8 -0
  24. data/lib/metasm/dalvik/decode.rb +196 -0
  25. data/lib/metasm/dalvik/main.rb +60 -0
  26. data/lib/metasm/dalvik/opcodes.rb +366 -0
  27. data/lib/metasm/decode.rb +213 -0
  28. data/lib/metasm/decompile.rb +2659 -0
  29. data/lib/metasm/disassemble.rb +2068 -0
  30. data/lib/metasm/disassemble_api.rb +1280 -0
  31. data/lib/metasm/dynldr.rb +1329 -0
  32. data/lib/metasm/encode.rb +333 -0
  33. data/lib/metasm/exe_format/a_out.rb +194 -0
  34. data/lib/metasm/exe_format/autoexe.rb +82 -0
  35. data/lib/metasm/exe_format/bflt.rb +189 -0
  36. data/lib/metasm/exe_format/coff.rb +455 -0
  37. data/lib/metasm/exe_format/coff_decode.rb +901 -0
  38. data/lib/metasm/exe_format/coff_encode.rb +1078 -0
  39. data/lib/metasm/exe_format/dex.rb +457 -0
  40. data/lib/metasm/exe_format/dol.rb +145 -0
  41. data/lib/metasm/exe_format/elf.rb +923 -0
  42. data/lib/metasm/exe_format/elf_decode.rb +979 -0
  43. data/lib/metasm/exe_format/elf_encode.rb +1375 -0
  44. data/lib/metasm/exe_format/macho.rb +827 -0
  45. data/lib/metasm/exe_format/main.rb +228 -0
  46. data/lib/metasm/exe_format/mz.rb +164 -0
  47. data/lib/metasm/exe_format/nds.rb +172 -0
  48. data/lib/metasm/exe_format/pe.rb +437 -0
  49. data/lib/metasm/exe_format/serialstruct.rb +246 -0
  50. data/lib/metasm/exe_format/shellcode.rb +114 -0
  51. data/lib/metasm/exe_format/xcoff.rb +167 -0
  52. data/lib/metasm/gui.rb +23 -0
  53. data/lib/metasm/gui/cstruct.rb +373 -0
  54. data/lib/metasm/gui/dasm_coverage.rb +199 -0
  55. data/lib/metasm/gui/dasm_decomp.rb +369 -0
  56. data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
  57. data/lib/metasm/gui/dasm_graph.rb +1354 -0
  58. data/lib/metasm/gui/dasm_hex.rb +543 -0
  59. data/lib/metasm/gui/dasm_listing.rb +599 -0
  60. data/lib/metasm/gui/dasm_main.rb +906 -0
  61. data/lib/metasm/gui/dasm_opcodes.rb +291 -0
  62. data/lib/metasm/gui/debug.rb +1228 -0
  63. data/lib/metasm/gui/gtk.rb +884 -0
  64. data/lib/metasm/gui/qt.rb +495 -0
  65. data/lib/metasm/gui/win32.rb +3004 -0
  66. data/lib/metasm/gui/x11.rb +621 -0
  67. data/lib/metasm/ia32.rb +14 -0
  68. data/lib/metasm/ia32/compile_c.rb +1523 -0
  69. data/lib/metasm/ia32/debug.rb +193 -0
  70. data/lib/metasm/ia32/decode.rb +1167 -0
  71. data/lib/metasm/ia32/decompile.rb +564 -0
  72. data/lib/metasm/ia32/encode.rb +314 -0
  73. data/lib/metasm/ia32/main.rb +233 -0
  74. data/lib/metasm/ia32/opcodes.rb +872 -0
  75. data/lib/metasm/ia32/parse.rb +327 -0
  76. data/lib/metasm/ia32/render.rb +91 -0
  77. data/lib/metasm/main.rb +1193 -0
  78. data/lib/metasm/mips.rb +11 -0
  79. data/lib/metasm/mips/compile_c.rb +7 -0
  80. data/lib/metasm/mips/decode.rb +253 -0
  81. data/lib/metasm/mips/encode.rb +51 -0
  82. data/lib/metasm/mips/main.rb +72 -0
  83. data/lib/metasm/mips/opcodes.rb +443 -0
  84. data/lib/metasm/mips/parse.rb +51 -0
  85. data/lib/metasm/mips/render.rb +43 -0
  86. data/lib/metasm/os/gnu_exports.rb +270 -0
  87. data/lib/metasm/os/linux.rb +1112 -0
  88. data/lib/metasm/os/main.rb +1686 -0
  89. data/lib/metasm/os/remote.rb +527 -0
  90. data/lib/metasm/os/windows.rb +2027 -0
  91. data/lib/metasm/os/windows_exports.rb +745 -0
  92. data/lib/metasm/parse.rb +876 -0
  93. data/lib/metasm/parse_c.rb +3938 -0
  94. data/lib/metasm/pic16c/decode.rb +42 -0
  95. data/lib/metasm/pic16c/main.rb +17 -0
  96. data/lib/metasm/pic16c/opcodes.rb +68 -0
  97. data/lib/metasm/ppc.rb +11 -0
  98. data/lib/metasm/ppc/decode.rb +264 -0
  99. data/lib/metasm/ppc/decompile.rb +251 -0
  100. data/lib/metasm/ppc/encode.rb +51 -0
  101. data/lib/metasm/ppc/main.rb +129 -0
  102. data/lib/metasm/ppc/opcodes.rb +410 -0
  103. data/lib/metasm/ppc/parse.rb +52 -0
  104. data/lib/metasm/preprocessor.rb +1277 -0
  105. data/lib/metasm/render.rb +130 -0
  106. data/lib/metasm/sh4.rb +8 -0
  107. data/lib/metasm/sh4/decode.rb +336 -0
  108. data/lib/metasm/sh4/main.rb +292 -0
  109. data/lib/metasm/sh4/opcodes.rb +381 -0
  110. data/lib/metasm/x86_64.rb +12 -0
  111. data/lib/metasm/x86_64/compile_c.rb +1025 -0
  112. data/lib/metasm/x86_64/debug.rb +59 -0
  113. data/lib/metasm/x86_64/decode.rb +268 -0
  114. data/lib/metasm/x86_64/encode.rb +264 -0
  115. data/lib/metasm/x86_64/main.rb +135 -0
  116. data/lib/metasm/x86_64/opcodes.rb +118 -0
  117. data/lib/metasm/x86_64/parse.rb +68 -0
  118. data/misc/bottleneck.rb +61 -0
  119. data/misc/cheader-findpppath.rb +58 -0
  120. data/misc/hexdiff.rb +74 -0
  121. data/misc/hexdump.rb +55 -0
  122. data/misc/metasm-all.rb +13 -0
  123. data/misc/objdiff.rb +47 -0
  124. data/misc/objscan.rb +40 -0
  125. data/misc/pdfparse.rb +661 -0
  126. data/misc/ppc_pdf2oplist.rb +192 -0
  127. data/misc/tcp_proxy_hex.rb +84 -0
  128. data/misc/txt2html.rb +440 -0
  129. data/samples/a.out.rb +31 -0
  130. data/samples/asmsyntax.rb +77 -0
  131. data/samples/bindiff.rb +555 -0
  132. data/samples/compilation-steps.rb +49 -0
  133. data/samples/cparser_makestackoffset.rb +55 -0
  134. data/samples/dasm-backtrack.rb +38 -0
  135. data/samples/dasmnavig.rb +318 -0
  136. data/samples/dbg-apihook.rb +228 -0
  137. data/samples/dbghelp.rb +143 -0
  138. data/samples/disassemble-gui.rb +102 -0
  139. data/samples/disassemble.rb +133 -0
  140. data/samples/dump_upx.rb +95 -0
  141. data/samples/dynamic_ruby.rb +1929 -0
  142. data/samples/elf_list_needed.rb +46 -0
  143. data/samples/elf_listexports.rb +33 -0
  144. data/samples/elfencode.rb +25 -0
  145. data/samples/exeencode.rb +128 -0
  146. data/samples/factorize-headers-elfimports.rb +77 -0
  147. data/samples/factorize-headers-peimports.rb +109 -0
  148. data/samples/factorize-headers.rb +43 -0
  149. data/samples/gdbclient.rb +583 -0
  150. data/samples/generate_libsigs.rb +102 -0
  151. data/samples/hotfix_gtk_dbg.rb +59 -0
  152. data/samples/install_win_env.rb +78 -0
  153. data/samples/lindebug.rb +924 -0
  154. data/samples/linux_injectsyscall.rb +95 -0
  155. data/samples/machoencode.rb +31 -0
  156. data/samples/metasm-shell.rb +91 -0
  157. data/samples/pe-hook.rb +69 -0
  158. data/samples/pe-ia32-cpuid.rb +203 -0
  159. data/samples/pe-mips.rb +35 -0
  160. data/samples/pe-shutdown.rb +78 -0
  161. data/samples/pe-testrelocs.rb +51 -0
  162. data/samples/pe-testrsrc.rb +24 -0
  163. data/samples/pe_listexports.rb +31 -0
  164. data/samples/peencode.rb +19 -0
  165. data/samples/peldr.rb +494 -0
  166. data/samples/preprocess-flatten.rb +19 -0
  167. data/samples/r0trace.rb +308 -0
  168. data/samples/rubstop.rb +399 -0
  169. data/samples/scan_pt_gnu_stack.rb +54 -0
  170. data/samples/scanpeexports.rb +62 -0
  171. data/samples/shellcode-c.rb +40 -0
  172. data/samples/shellcode-dynlink.rb +146 -0
  173. data/samples/source.asm +34 -0
  174. data/samples/struct_offset.rb +47 -0
  175. data/samples/testpe.rb +32 -0
  176. data/samples/testraw.rb +45 -0
  177. data/samples/win32genloader.rb +132 -0
  178. data/samples/win32hooker-advanced.rb +169 -0
  179. data/samples/win32hooker.rb +96 -0
  180. data/samples/win32livedasm.rb +33 -0
  181. data/samples/win32remotescan.rb +133 -0
  182. data/samples/wintrace.rb +92 -0
  183. data/tests/all.rb +8 -0
  184. data/tests/dasm.rb +39 -0
  185. data/tests/dynldr.rb +35 -0
  186. data/tests/encodeddata.rb +132 -0
  187. data/tests/ia32.rb +82 -0
  188. data/tests/mips.rb +116 -0
  189. data/tests/parse_c.rb +239 -0
  190. data/tests/preprocessor.rb +269 -0
  191. data/tests/x86_64.rb +62 -0
  192. metadata +255 -0
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env ruby
2
+ # This file is part of Metasm, the Ruby assembly manipulation suite
3
+ # Copyright (C) 2006-2009 Yoann GUILLOT
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+
8
+ #
9
+ # this script disassembles an executable (elf/pe) and dumps the output
10
+ # ruby -h for help
11
+ #
12
+
13
+ require 'metasm'
14
+ include Metasm
15
+ require 'optparse'
16
+
17
+ # parse arguments
18
+ opts = { :sc_cpu => 'Ia32' }
19
+ OptionParser.new { |opt|
20
+ opt.banner = 'Usage: disassemble.rb [options] <executable> [<entrypoints>]'
21
+ opt.on('--no-data', 'do not display data bytes') { opts[:nodata] = true }
22
+ opt.on('--no-data-trace', 'do not backtrace memory read/write accesses') { opts[:nodatatrace] = true }
23
+ opt.on('--debug-backtrace', 'enable backtrace-related debug messages (very verbose)') { opts[:debugbacktrace] = true }
24
+ opt.on('-c <header>', '--c-header <header>', 'read C function prototypes (for external library functions)') { |h| opts[:cheader] = h }
25
+ opt.on('-o <outfile>', '--output <outfile>', 'save the assembly listing in the specified file (defaults to stdout)') { |h| opts[:outfile] = h }
26
+ opt.on('--cpu <cpu>', 'the CPU class to use for a shellcode (Ia32, X64, ...)') { |c| opts[:sc_cpu] = c }
27
+ opt.on('--exe <exe_fmt>', 'the executable file format to use (PE, ELF, ...)') { |c| opts[:exe_fmt] = c }
28
+ opt.on('--rebase <addr>', 'rebase the loaded file to <addr>') { |a| opts[:rebase] = Integer(a) }
29
+ opt.on('-s <savefile>', 'save the disassembler state after disasm') { |h| opts[:savefile] = h }
30
+ opt.on('-S <addrlist>', '--stop <addrlist>', '--stopaddr <addrlist>', 'do not disassemble past these addresses') { |h| opts[:stopaddr] ||= [] ; opts[:stopaddr] |= h.split ',' }
31
+ opt.on('-P <plugin>', '--plugin <plugin>', 'load a metasm disassembler plugin') { |h| (opts[:plugin] ||= []) << h }
32
+ opt.on('--post-plugin <plugin>', 'load a metasm disassembler plugin after disassembly is finished') { |h| (opts[:post_plugin] ||= []) << h }
33
+ opt.on('-e <code>', '--eval <code>', 'eval a ruby code') { |h| (opts[:hookstr] ||= []) << h }
34
+ opt.on('--benchmark') { opts[:benchmark] = true }
35
+ opt.on('--decompile') { opts[:decompile] = true }
36
+ opt.on('--map <mapfile>') { |f| opts[:map] = f }
37
+ opt.on('-a', '--autoload', 'loads all relevant files with same filename (.h, .map..)') { opts[:autoload] = true }
38
+ opt.on('--fast', 'use disassemble_fast (no backtracking)') { opts[:fast] = true }
39
+ opt.on('-v', '--verbose') { $VERBOSE = true }
40
+ opt.on('-d', '--debug') { $DEBUG = $VERBOSE = true }
41
+ }.parse!(ARGV)
42
+
43
+ exename = ARGV.shift
44
+
45
+ t0 = Time.now if opts[:benchmark]
46
+
47
+ # load the file
48
+ if exename =~ /^live:(.*)/
49
+ raise 'no such live target' if not target = OS.current.find_process($1)
50
+ p target if $VERBOSE
51
+ exe = Shellcode.decode(target.memory, Metasm.const_get(opts[:sc_cpu]).new)
52
+ else
53
+ exefmt = opts[:exe_fmt] ? Metasm.const_get(opts[:exe_fmt]) : AutoExe.orshellcode { Metasm.const_get(opts[:sc_cpu]).new }
54
+ exefmt = exefmt.withcpu(Metasm.const_get(opts[:sc_cpu]).new) if opts[:exe_fmt] == 'Shellcode' and opts[:sc_cpu]
55
+ exe = exefmt.decode_file(exename)
56
+ exe.disassembler.rebase(opts[:rebase]) if opts[:rebase]
57
+ if opts[:autoload]
58
+ basename = exename.sub(/\.\w\w?\w?$/, '')
59
+ opts[:map] ||= basename + '.map' if File.exist?(basename + '.map')
60
+ opts[:cheader] ||= basename + '.h' if File.exist?(basename + '.h')
61
+ (opts[:plugin] ||= []) << (basename + '.rb') if File.exist?(basename + '.rb')
62
+ end
63
+ end
64
+ # set options
65
+ dasm = exe.init_disassembler
66
+ makeint = lambda { |addr|
67
+ case addr
68
+ when /^[0-9].*h/; addr.to_i(16)
69
+ when /^[0-9]/; Integer(addr)
70
+ else dasm.normalize(addr)
71
+ end
72
+ }
73
+ dasm.load_map opts[:map] if opts[:map]
74
+ dasm.parse_c_file opts[:cheader] if opts[:cheader]
75
+ dasm.backtrace_maxblocks_data = -1 if opts[:nodatatrace]
76
+ dasm.debug_backtrace = true if opts[:debugbacktrace]
77
+ opts[:stopaddr].to_a.each { |addr| dasm.decoded[makeint[addr]] = true }
78
+ opts[:plugin].to_a.each { |p| dasm.load_plugin p }
79
+ opts[:hookstr].to_a.each { |f| eval f }
80
+
81
+ t1 = Time.now if opts[:benchmark]
82
+ # do the work
83
+ begin
84
+ method = opts[:fast] ? :disassemble_fast_deep : :disassemble
85
+ if ARGV.empty?
86
+ exe.send(method)
87
+ else
88
+ exe.send(method, *ARGV.map { |addr| makeint[addr] })
89
+ end
90
+ rescue Interrupt
91
+ puts $!, $!.backtrace
92
+ end
93
+ t2 = Time.now if opts[:benchmark]
94
+
95
+ if opts[:decompile]
96
+ dasm.save_file(opts[:savefile]) if opts[:savefile]
97
+ dasm.decompile(*dasm.entrypoints)
98
+ tdc = Time.now if opts[:benchmark]
99
+ end
100
+
101
+ opts[:post_plugin].to_a.each { |p| dasm.load_plugin p }
102
+
103
+ dasm.save_file(opts[:savefile]) if opts[:savefile]
104
+
105
+ # output
106
+ if opts[:outfile]
107
+ File.open(opts[:outfile], 'w') { |fd|
108
+ fd.puts dasm.c_parser if opts[:decompile]
109
+ fd.puts "#if 0" if opts[:decompile]
110
+ dasm.dump(!opts[:nodata]) { |l| fd.puts l }
111
+ fd.puts "#endif" if opts[:decompile]
112
+ }
113
+ elsif not opts[:savefile]
114
+ if opts[:decompile]
115
+ puts dasm.c_parser
116
+ else
117
+ dasm.dump(!opts[:nodata])
118
+ end
119
+ end
120
+
121
+ t3 = Time.now if opts[:benchmark]
122
+
123
+ todate = lambda { |f|
124
+ if f > 5400
125
+ "#{f.to_i/3600}h#{(f.to_i%3600)/60}mn"
126
+ elsif f > 90
127
+ "#{f.to_i/60}mn#{f.to_i%60}s"
128
+ else
129
+ "#{'%.02f' % f}s"
130
+ end
131
+ }
132
+
133
+ puts "durations\n load #{todate[t1-t0]}\n dasm #{todate[t2-t1]}#{"\n decomp "+todate[tdc-t2] if tdc}\n output #{todate[t3-(tdc||t2)]}\n total #{todate[t3-t0]}" if opts[:benchmark]
@@ -0,0 +1,95 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ # Original script and idea by Alexandre GAZET
4
+ #
5
+ # Licence is LGPL, see LICENCE in the top-level directory
6
+
7
+
8
+ #
9
+ # this script will load an upx-packed windows executable, find its
10
+ # original entrypoint by disassembling the UPX stub, set breakpoint on it,
11
+ # run the program, and dump the loaded image to an executable PE.
12
+ #
13
+ # usage: dump_upx.rb <packed.exe> [<dumped.exe>] [<rva iat>]
14
+ #
15
+
16
+ require 'metasm'
17
+ include Metasm
18
+
19
+ class UPXUnpacker
20
+ # loads the file
21
+ # find the oep by disassembling
22
+ # run it until the oep
23
+ # dump the memory image
24
+ def initialize(file, dumpfile, iat_rva=nil)
25
+ @dumpfile = dumpfile || 'upx-dumped.exe'
26
+ @iat = iat_rva
27
+
28
+ puts 'disassembling UPX loader...'
29
+ pe = PE.decode_file(file)
30
+ @oep = find_oep(pe)
31
+ raise 'cant find oep...' if not @oep
32
+ puts "oep found at #{Expression[@oep]}"
33
+ @baseaddr = pe.optheader.image_base
34
+ @iat -= @baseaddr if @iat > @baseaddr # va => rva
35
+
36
+ @dbg = OS.current.create_process(file).debugger
37
+ puts 'running...'
38
+ debugloop
39
+ end
40
+
41
+ # disassemble the upx stub to find a cross-section jump (to the real entrypoint)
42
+ def find_oep(pe)
43
+ dasm = pe.disassemble_fast 'entrypoint'
44
+
45
+ return if not jmp = dasm.decoded.find { |addr, di|
46
+ # check only once per basic block
47
+ next if not di.block_head?
48
+ b = di.block
49
+ # our target has only one follower
50
+ next if b.to_subfuncret.to_a.length != 0 or b.to_normal.to_a.length != 1
51
+ to = b.to_normal.first
52
+ # ignore jump to unmmaped address
53
+ next if not s = dasm.get_section_at(to)
54
+ # ignore jump to same section
55
+ next if dasm.get_section_at(di.address) == s
56
+
57
+ # gotcha !
58
+ true
59
+ }
60
+
61
+ # now jmp is a couple [addr, di], we extract and normalize the oep from there
62
+ dasm.normalize(jmp[1].block.to_normal.first)
63
+ end
64
+
65
+ def debugloop
66
+ # set up a oneshot breakpoint on oep
67
+ @dbg.hwbp(@oep, :x, 1, true) { breakpoint_callback }
68
+ @dbg.run_forever
69
+ puts 'done'
70
+ end
71
+
72
+ def breakpoint_callback
73
+ puts 'breakpoint hit !'
74
+
75
+ # dump the process
76
+ # create a genuine PE object from the memory image
77
+ dump = LoadedPE.memdump @dbg.memory, @baseaddr, @oep, @iat
78
+
79
+ # the UPX loader unpacks everything in sections marked read-only in the PE header, make them writeable
80
+ dump.sections.each { |s| s.characteristics |= ['MEM_WRITE'] }
81
+
82
+ # write the PE file to disk
83
+ dump.encode_file @dumpfile
84
+
85
+ puts 'dump complete'
86
+ ensure
87
+ # kill the process
88
+ @dbg.kill
89
+ end
90
+ end
91
+
92
+ if __FILE__ == $0
93
+ # args: packed [unpacked] [iat rva]
94
+ UPXUnpacker.new(ARGV.shift, ARGV.shift, (Integer(ARGV.shift) rescue nil))
95
+ end
@@ -0,0 +1,1929 @@
1
+ # This file is part of Metasm, the Ruby assembly manipulation suite
2
+ # Copyright (C) 2006-2009 Yoann GUILLOT
3
+ #
4
+ # Licence is LGPL, see LICENCE in the top-level directory
5
+
6
+ # This sample hacks in the ruby interpreter to allow dynamic loading of shellcodes as object methods
7
+ # Also it allows raw modifications to the ruby interpreter memory, for all kind of purposes
8
+ # Includes methods to dump the ruby parser AST from the interpreter memory
9
+ # elf/linux/x86 only
10
+
11
+ require 'metasm'
12
+
13
+
14
+ module Metasm
15
+ class RubyHack < DynLdr
16
+ # basic C defs for ruby AST - ruby1.8 only !
17
+ RUBY_INTERN_NODE = <<EOS
18
+ struct node {
19
+ long flags;
20
+ char *file;
21
+ long a1;
22
+ long a2;
23
+ long a3;
24
+ };
25
+ #define FL_USHIFT 11
26
+ #define nd_type(n) ((((struct node*)n)->flags >> FL_USHIFT) & 0xff)
27
+ EOS
28
+ NODETYPE = [
29
+ :method, :fbody, :cfunc, :scope, :block,
30
+ :if, :case, :when, :opt_n, :while,
31
+ :until, :iter, :for, :break, :next,
32
+ :redo, :retry, :begin, :rescue, :resbody,
33
+ :ensure, :and, :or, :not, :masgn,
34
+ :lasgn, :dasgn, :dasgn_curr, :gasgn, :iasgn,
35
+ :cdecl, :cvasgn, :cvdecl, :op_asgn1, :op_asgn2,
36
+ :op_asgn_and, :op_asgn_or, :call, :fcall, :vcall,
37
+ :super, :zsuper, :array, :zarray, :hash,
38
+ :return, :yield, :lvar, :dvar, :gvar, # 50
39
+ :ivar, :const, :cvar, :nth_ref, :back_ref,
40
+ :match, :match2, :match3, :lit, :str,
41
+ :dstr, :xstr, :dxstr, :evstr, :dregx,
42
+ :dregx_once, :args, :argscat, :argspush, :splat,
43
+ :to_ary, :svalue, :block_arg, :block_pass, :defn,
44
+ :defs, :alias, :valias, :undef, :class,
45
+ :module, :sclass, :colon2, :colon3, :cref,
46
+ :dot2, :dot3, :flip2, :flip3, :attrset,
47
+ :self, :nil, :true, :false, :defined,
48
+ :newline, :postexe, :alloca, :dmethod, :bmethod, # 100
49
+ :memo, :ifunc, :dsym, :attrasgn, :last
50
+ ]
51
+
52
+ new_api_c 'void rb_define_method(uintptr_t, char *, uintptr_t (*)(), int)'
53
+ new_api_c 'void *rb_method_node(uintptr_t, unsigned id)'
54
+
55
+ class << self
56
+ def set_class_method_raw(klass, meth, code, nparams)
57
+ memory_perm(str_ptr(code), code.length, 'rwx')
58
+ rb_define_method(rb_obj_to_value(klass), meth, code, nparams)
59
+ end
60
+
61
+ def get_method_node_ptr(klass, meth)
62
+ raise "#{klass.inspect} is not a class" if not klass.kind_of? Module
63
+ rb_method_node(rb_obj_to_value(klass), meth.to_sym.to_i)
64
+ end
65
+
66
+ # sets up rawopcodes as the method implementation for class klass
67
+ # rawopcodes must implement the expected ABI or things will break horribly
68
+ # this method is VERY UNSAFE, and breaks everything put in place by the ruby interpreter
69
+ # use with EXTREME CAUTION
70
+ # nargs arglist
71
+ # -2 self, arg_ary
72
+ # -1 argc, VALUE*argv, self
73
+ # >=0 self, arg0, arg1..
74
+ def set_method_binary(klass, methodname, raw, nargs=nil)
75
+ nargs ||= klass.instance_method(methodname).arity
76
+ if raw.kind_of? EncodedData
77
+ baseaddr = str_ptr(raw.data)
78
+ bd = raw.binding(baseaddr)
79
+ raw.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(0, ext) or raise "unknown symbol #{ext}" }
80
+ raw.fixup(bd)
81
+ raw = raw.data
82
+ end
83
+ (@@prevent_gc ||= {})[[klass, methodname]] = raw
84
+ set_class_method_raw(klass, methodname.to_s, raw, nargs)
85
+ end
86
+
87
+ # same as load_binary_method but with an object and not a class
88
+ def set_singleton_method_binary(obj, *a)
89
+ set_method_binary((class << obj ; self ; end), *a)
90
+ end
91
+
92
+ def read_method_ast(klass, meth)
93
+ read_node get_method_node_ptr(klass, meth)
94
+ end
95
+
96
+ def read_singleton_method_ast(klass, meth)
97
+ klass = (class << klass ; self ; end)
98
+ read_method_ast(klass, meth)
99
+ end
100
+
101
+ def read_node(ptr, cur=nil)
102
+ return if ptr == 0 or ptr == 4
103
+
104
+ type = NODETYPE[(memory_read_int(ptr) >> 11) & 0xff]
105
+ v1 = memory_read_int(ptr+8)
106
+ v2 = memory_read_int(ptr+12)
107
+ v3 = memory_read_int(ptr+16)
108
+
109
+ case type
110
+ when :block, :array, :hash
111
+ cur = nil if cur and cur[0] != type
112
+ cur ||= [type]
113
+ cur << read_node(v1)
114
+ n = read_node(v3, cur)
115
+ raise "block->next = #{n.inspect}" if n and n[0] != type
116
+ cur
117
+ when :newline
118
+ read_node(v3) # debug/trace usage only
119
+ when :if
120
+ [type, read_node(v1), read_node(v2), read_node(v3)]
121
+ when :cfunc
122
+ v2 = {0xffffffff => -1, 0xfffffffe => -2, 0xffffffffffffffff => -1, 0xfffffffffffffffe => -2}[v2] || v2
123
+ [type, {:fptr => v1, # c func pointer
124
+ :arity => v2}]
125
+ when :scope
126
+ [type, {:localnr => (v1 != 0 && v1 != 4 ? memory_read_int(v1) : 0), # nr of local vars (+2 for $_/$~)
127
+ :cref => read_node(v2)[1..-1]}, # node, starting point for const/@@var resolution
128
+ read_node(v3)]
129
+ when :cref
130
+ cur = nil if cur and cur[0] != type
131
+ cur ||= [type]
132
+ cur << rb_value_to_obj(v1) if v1 != 0 and v1 != 4
133
+ n = read_node(v3, cur)
134
+ raise "block->next = #{n.inspect}" if n and n[0] != type
135
+ cur
136
+ when :call, :fcall, :vcall
137
+ [type, read_node(v1), v2.id2name, read_node(v3)]
138
+ when :dstr
139
+ ret = [type, [:str, rb_value_to_obj(v1)]]
140
+ if args = read_node(v3)
141
+ raise "#{ret.inspect} with args != array: #{args.inspect}" if args[0] != :array
142
+ ret.concat args[1..-1]
143
+ end
144
+ ret
145
+ when :zarray
146
+ [:array]
147
+ when :lasgn
148
+ [type, v3, read_node(v2)]
149
+ when :iasgn, :dasgn, :dasgn_curr, :gasgn, :cvasgn
150
+ [type, v1.id2name, read_node(v2)]
151
+ when :masgn
152
+ # multiple assignment: a, b = 42 / lambda { |x, y| }.call(1, 2)
153
+ # v3 = remainder storage (a, b, *c = ary => v3=c)
154
+ [type, read_node(v1), read_node(v2), read_node(v3)]
155
+ when :attrasgn
156
+ [type, ((v1 == 1) ? :self : read_node(v1)), v2.id2name, read_node(v3)]
157
+ when :lvar
158
+ [type, v3]
159
+ when :ivar, :dvar, :gvar, :cvar, :const, :attrset
160
+ [type, v1.id2name]
161
+ when :str
162
+ # cannot use _id2ref here, probably the parser does not use standard alloced objects
163
+ s = memory_read(memory_read_int(v1+12), memory_read_int(v1+16))
164
+ [type, s]
165
+ when :lit
166
+ [type, rb_value_to_obj(v1)]
167
+ when :args # specialcased by rb_call0, invalid in rb_eval
168
+ cnt = v3 # nr of required args, copied directly to local_vars
169
+ opt = read_node(v1) # :block to execute for each missing arg / with N optargs specified, skip N 1st statements
170
+ rest = read_node(v2) # catchall arg in def foo(rq1, rq2, *rest)
171
+ [type, cnt, opt, rest]
172
+ when :and, :or
173
+ [type, read_node(v1), read_node(v2)] # shortcircuit
174
+ when :not
175
+ [type, read_node(v2)]
176
+ when :nil, :true, :false, :self
177
+ [type]
178
+ when :redo, :retry
179
+ [type]
180
+ when :case
181
+ # [:case, var_test, [:when, cnd, action, [:when, cnd2, action2, else]]]
182
+ # => [:case, var_test, [:when, cnd, action], [:when, cnd2, action], else]
183
+ cs = [type, read_node(v1), read_node(v2)]
184
+ cs << cs[-1].pop while cs[-1][0] == :when and cs[-1][3]
185
+ cs
186
+ when :when
187
+ # [:when, [:array, [test]], then, else]
188
+ [type, read_node(v1), read_node(v2), read_node(v3)]
189
+ when :iter
190
+ # save a block for the following funcall
191
+ args = read_node(v1) # assignments with nil, not realized, just to store the arg list (multi args -> :masgn)
192
+ body = read_node(v2) # the body statements (multi -> :block)
193
+ subj = read_node(v3) # the stuff which is passed the block, probably a :call
194
+ [type, args, body, subj]
195
+ when :while, :until
196
+ [type, read_node(v1), read_node(v2), v3]
197
+ when :return, :break, :next, :defined
198
+ [type, read_node(v1)]
199
+ when :to_ary
200
+ [type, read_node(v1)]
201
+ when :colon2
202
+ [type, read_node(v1), v2.id2name]
203
+ when :colon3 # ::Stuff
204
+ [type, v2.id2name]
205
+ when :method
206
+ [type, v1, read_node(v2), v3]
207
+ when :alias
208
+ [type, v1, v2, v3] # ?
209
+ when :evstr
210
+ [type, read_node(v2)]
211
+ when :dot2, :dot3
212
+ [type, read_node(v1), read_node(v2)]
213
+ when :splat
214
+ [type, read_node(v1)]
215
+ when :argscat
216
+ [type, read_node(v1), read_node(v2), v3]
217
+ when :block_pass
218
+ # [args, block, receiver]: foo(bar, &baz) => [:bpass, [:array, bar], [:lvar, baz], [:call, 'foo', bar]] (args in v1&v3!)
219
+ [type, read_node(v1), read_node(v2), read_node(v3)]
220
+ when :block_arg
221
+ [type, v1.id2name, v2, v3]
222
+ when :ensure
223
+ [type, read_node(v1), v2, read_node(v3)]
224
+ else
225
+ puts "unhandled #{type.inspect}"
226
+ [type, v1, v2, v3]
227
+ end
228
+ end
229
+ end # class << self
230
+ end
231
+
232
+ # a ruby2c C generator for use in the current ruby interpreter
233
+ # generates C suitable for shellcode compilation & insertion in the current interpreter
234
+ # has hardcoded addresses etc
235
+ class RubyLiveCompiler
236
+ attr_accessor :cp
237
+
238
+ RUBY_H = <<EOS
239
+ #{DynLdr::RUBY_H}
240
+
241
+ VALUE rb_ivar_get(VALUE, unsigned);
242
+ VALUE rb_ivar_set(VALUE, unsigned, VALUE);
243
+ VALUE rb_ivar_defined(VALUE, unsigned);
244
+ VALUE rb_cvar_get(VALUE, unsigned);
245
+ VALUE rb_cvar_set(VALUE, unsigned, VALUE, int);
246
+ VALUE rb_gv_get(const char*);
247
+ VALUE rb_gv_set(const char*, VALUE);
248
+
249
+ VALUE rb_ary_new(void);
250
+ VALUE rb_ary_new4(long, VALUE*);
251
+ VALUE rb_ary_push(VALUE, VALUE);
252
+ VALUE rb_ary_pop(VALUE);
253
+ VALUE rb_ary_shift(VALUE);
254
+ VALUE rb_hash_new(void);
255
+ VALUE rb_hash_aset(VALUE, VALUE, VALUE);
256
+ VALUE rb_str_new(const char*, long);
257
+ VALUE rb_str_new2(const char*);
258
+ VALUE rb_str_cat2(VALUE, const char*);
259
+ VALUE rb_str_concat(VALUE, VALUE);
260
+ VALUE rb_str_append(VALUE, VALUE);
261
+ VALUE rb_obj_as_string(VALUE);
262
+ VALUE rb_range_new(VALUE, VALUE, int exclude_end);
263
+ VALUE rb_Array(VALUE); // :splat
264
+ VALUE rb_ary_to_ary(VALUE);
265
+ VALUE rb_hash_aref(VALUE, VALUE);
266
+ VALUE rb_funcall3(VALUE, unsigned, int, VALUE*);
267
+ VALUE rb_singleton_class(VALUE);
268
+ VALUE rb_block_proc(void);
269
+ void rb_define_method(VALUE, char *, VALUE (*)(), int);
270
+ void *rb_method_node(VALUE, unsigned);
271
+ EOS
272
+
273
+ class Fail < RuntimeError
274
+ end
275
+
276
+ def self.compile(klass, *methlist)
277
+ @rcp ||= new
278
+ methlist.each { |meth|
279
+ ast = RubyHack.read_method_ast(klass, meth)
280
+ n = @rcp.compile(ast, klass, meth)
281
+ next if not n
282
+ raw = RubyHack.compile_c(@rcp.cp.dump_definition(n)).encoded
283
+ RubyHack.set_method_binary(klass, meth, raw)
284
+ }
285
+ self
286
+ end
287
+
288
+ def dump(m=nil)
289
+ m ? @cp.dump_definition(m) : @cp.to_s
290
+ end
291
+
292
+ attr_accessor :optim_hint
293
+ def initialize(cp=nil)
294
+ @cp = cp || DynLdr.host_cpu.new_cparser
295
+ @cp.parse RUBY_H
296
+ @iter_break = nil
297
+ @optim_hint = {}
298
+ end
299
+
300
+ # convert a ruby AST to a new C function
301
+ # returns the new function name
302
+ def compile(ast, klass, meth, singleton=false)
303
+ return if not ast
304
+
305
+ # TODO handle arbitrary block/yield constructs
306
+ # TODO analyse to find/optimize numeric locals that never need a ruby VALUE (ie native int vs INT2FIX)
307
+ # TODO detect block/closure exported out of the func & abort compilation
308
+
309
+ @klass = klass
310
+ @meth = meth
311
+ @meth_singleton = singleton
312
+
313
+ mname = escape_varname("m_#{@klass}#{singleton ? '.' : '#'}#{@meth}".gsub('::', '_'))
314
+ @cp.parse "static void #{mname}(VALUE self) { }"
315
+ @cur_cfunc = @cp.toplevel.symbol[mname]
316
+ @cur_cfunc.type.type = value # return type = VALUE, w/o 'missing return statement' warning
317
+
318
+ @scope = @cur_cfunc.initializer
319
+
320
+ case ast[0]
321
+ when :ivar # attr_reader
322
+ ret = fcall('rb_ivar_get', rb_self, rb_intern(ast[1]))
323
+ when :attrset # attr_writer
324
+ compile_args(@cur_cfunc, [nil, 1])
325
+ ret = fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), local(2))
326
+ when :scope # standard ruby function
327
+ @cref = ast[1][:cref]
328
+ if ast[2] and ast[2][0] == :block and ast[2][1] and ast[2][1][0] == :args
329
+ compile_args(@cur_cfunc, ast[2][1])
330
+ end
331
+ want_value = true
332
+ if meth.to_s == 'initialize' and not singleton
333
+ want_value = false
334
+ end
335
+ ret = ast_to_c(ast[2], @scope, want_value)
336
+ ret = rb_nil if not want_value
337
+ #when :cfunc # native ruby extension
338
+ else raise "unhandled function ast #{ast.inspect}"
339
+ end
340
+
341
+ @scope.statements << C::Return.new(ret)
342
+
343
+ mname
344
+ end
345
+
346
+ # return the arity of method 'name' on self
347
+ def method_arity(name=@meth)
348
+ @meth_singleton ? @klass.method(name).arity : @klass.instance_method(name).arity
349
+ end
350
+
351
+ # find the scope where constname is defined from @cref
352
+ def resolve_const_owner(constname)
353
+ @cref.find { |cr| cr.constants.map { |c| c.to_s }.include? constname.to_s }
354
+ end
355
+
356
+ # checks if ast maps to a constant, returns it if it does
357
+ def check_const(ast)
358
+ case ast[0]
359
+ when :const
360
+ resolve_const_owner(ast[1])
361
+ when :colon2
362
+ if cst = check_const(ast[1])
363
+ cst.const_get(ast[2])
364
+ end
365
+ when :colon3
366
+ ::Object.const_get(ast[2])
367
+ end
368
+ end
369
+
370
+ def compile_args(func, args)
371
+ case method_arity
372
+ when -1 # args[1] == 0 and (args[2] or args[3])
373
+ compile_args_m1(func, args)
374
+ when -2 # args[1] > 0 and (args[2] or args[3])
375
+ compile_args_m2(func, args)
376
+ else
377
+ # fixed arity = args[1]: VALUE func(VALUE self, VALUE local_2, VALUE local_3)
378
+ args[1].times { |i|
379
+ v = C::Variable.new("local_#{i+2}", value)
380
+ @scope.symbol[v.name] = v
381
+ func.type.args << v
382
+ }
383
+ end
384
+ end
385
+
386
+ # update func prototype to reflect arity -1
387
+ # VALUE func(int argc, VALUE *argv, VALUE self)
388
+ def compile_args_m1(func, args)
389
+ c = C::Variable.new("arg_c", C::BaseType.new(:int, :unsigned))
390
+ v = C::Variable.new("arg_v", C::Pointer.new(value))
391
+ @scope.symbol[c.name] = c
392
+ @scope.symbol[v.name] = v
393
+ func.type.args.unshift v
394
+ func.type.args.unshift c
395
+
396
+ args[1].times { |i|
397
+ local(i+2, C::CExpression[v, :'[]', [i]])
398
+ }
399
+
400
+ if args[2]
401
+ # [:block, [:lasgn, 2, [:lit, 4]]]
402
+ raise Fail, "unhandled vararglist #{args.inspect}" if args[2][0] != :block
403
+ args[2][1..-1].each_with_index { |a, i|
404
+ raise Fail, "unhandled arg #{a.inspect}" if a[0] != :lasgn
405
+ cnd = C::CExpression[c, :>, i]
406
+ thn = C::CExpression[local(a[1], :none), :'=', [v, :'[]', [i]]]
407
+ els = C::Block.new(@scope)
408
+ ast_to_c(a, els, false)
409
+ @scope.statements << C::If.new(cnd, thn, els)
410
+ }
411
+ end
412
+
413
+ if args[3]
414
+ raise Fail, "unhandled vararglist3 #{args.inspect}" if args[3][0] != :lasgn
415
+ skiplen = args[1] + args[2].length - 1
416
+ alloc = fcall('rb_ary_new4', [c, :-, [skiplen]], [v, :+, [skiplen]])
417
+ local(args[3][1], C::CExpression[[c, :>, skiplen], :'?:', [alloc, fcall('rb_ary_new')]])
418
+ end
419
+ end
420
+
421
+ # update func prototype to reflect arity -2
422
+ # VALUE func(VALUE self, VALUE arg_array)
423
+ def compile_args_m2(func, args)
424
+ v = C::Variable.new("arglist", value)
425
+ @scope.symbol[v.name] = v
426
+ func.type.args << v
427
+
428
+ args[1].times { |i|
429
+ local(i+2, fcall('rb_ary_shift', v))
430
+ }
431
+
432
+ # populate arguments with default values
433
+ if args[2]
434
+ # [:block, [:lasgn, 2, [:lit, 4]]]
435
+ raise Fail, "unhandled vararglist #{args.inspect}" if args[2][0] != :block
436
+ args[2][1..-1].each { |a|
437
+ raise Fail, "unhandled arg #{a.inspect}" if a[0] != :lasgn
438
+ t = C::CExpression[local(a[1], :none), :'=', fcall('rb_ary_shift', v)]
439
+ e = C::Block.new(@scope)
440
+ ast_to_c([:lasgn, a[1], a[2]], e, false)
441
+ @scope.statements << C::If.new(rb_ary_len(v), t, e)
442
+ }
443
+ end
444
+
445
+ if args[3]
446
+ raise Fail, "unhandled vararglist3 #{args.inspect}" if args[3][0] != :lasgn
447
+ local(args[3][1], C::CExpression[v])
448
+ end
449
+ end
450
+
451
+ # compile a case/when
452
+ # create a real C switch() for Fixnums, and put the others === in the default case
453
+ # XXX will get the wrong order for "case x; when 1; when Fixnum; when 3;" ...
454
+ def compile_case(ast, scope, want_value)
455
+ # this generates
456
+ # var = stuff_to_test()
457
+ # if (var & 1)
458
+ # switch (var >> 1) {
459
+ # case 12:
460
+ # stuff();
461
+ # break;
462
+ # default:
463
+ # goto default_case;
464
+ # }
465
+ # else
466
+ # default_case:
467
+ # if (var == true.object_id || rb_test(rb_funcall(bla, '===', var)))
468
+ # foo();
469
+ # else {
470
+ # default();
471
+ # }
472
+ #
473
+ if want_value == true
474
+ ret = get_new_tmp_var('case', want_value)
475
+ want_value = ret
476
+ elsif want_value
477
+ ret = want_value
478
+ end
479
+
480
+ var = ast_to_c(ast[1], scope, want_value || true)
481
+ if not var.kind_of? C::Variable
482
+ ret ||= get_new_tmp_var('case', want_value)
483
+ scope.statements << C::CExpression[ret, :'=', var]
484
+ var = ret
485
+ end
486
+
487
+ # the scope to put all case int in
488
+ body_int = C::Block.new(scope)
489
+ # the scope to put the if (cs === var) cascade
490
+ body_other_head = body_other = nil
491
+ default = nil
492
+
493
+ ast[2..-1].each { |cs|
494
+ if cs[0] == :when
495
+ raise Fail if cs[1][0] != :array
496
+
497
+ # numeric case, add a case to body_int
498
+ if cs[1][1..-1].all? { |cd| cd[0] == :lit and (cd[1].kind_of? Fixnum or cd[1].kind_of? Range) }
499
+ cs[1][1..-1].each { |cd|
500
+ if cd[1].kind_of? Range
501
+ b = cd[1].begin
502
+ e = cd[1].end
503
+ e -= 1 if cd[1].exclude_end?
504
+ raise Fail unless b.kind_of? Integer and e.kind_of? Integer
505
+ body_int.statements << C::Case.new(b, e, nil)
506
+ else
507
+ body_int.statements << C::Case.new(cd[1], nil, nil)
508
+ end
509
+ }
510
+ cb = C::Block.new(scope)
511
+ v = ast_to_c(cs[2], cb, want_value)
512
+ cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
513
+ cb.statements << C::Break.new
514
+ body_int.statements << cb
515
+
516
+ # non-numeric (or mixed) case, add if ( cs === var )
517
+ else
518
+ cnd = nil
519
+ cs[1][1..-1].each { |cd|
520
+ if (cd[0] == :lit and (cd[1].kind_of?(Fixnum) or cd[1].kind_of?(Symbol))) or
521
+ [:nil, :true, :false].include?(cd[0])
522
+ # true C equality
523
+ cd = C::CExpression[var, :==, ast_to_c(cd, scope)]
524
+ else
525
+ # own block for ast_to_c to honor lazy evaluation
526
+ tb = C::Block.new(scope)
527
+ test = rb_test(rb_funcall(ast_to_c(cd, tb), '===', var), tb)
528
+ # discard own block unless needed
529
+ if tb.statements.empty?
530
+ cd = test
531
+ else
532
+ tb.statements << test
533
+ cd = C::CExpression[tb, value]
534
+ end
535
+ end
536
+ cnd = (cnd ? C::CExpression[cnd, :'||', cd] : cd)
537
+ }
538
+ cb = C::Block.new(scope)
539
+ v = ast_to_c(cs[2], cb, want_value)
540
+ cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
541
+
542
+ fu = C::If.new(cnd, cb, nil)
543
+
544
+ if body_other
545
+ body_other.belse = fu
546
+ else
547
+ body_other_head = fu
548
+ end
549
+ body_other = fu
550
+ end
551
+
552
+ # default case statement
553
+ else
554
+ cb = C::Block.new(scope)
555
+ v = ast_to_c(cs, cb, want_value)
556
+ cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
557
+ default = cb
558
+ end
559
+ }
560
+
561
+ # if we use the value of the case, we must add an 'else: nil'
562
+ if want_value and not default
563
+ default = C::Block.new(scope)
564
+ default.statements << C::CExpression[ret, :'=', rb_nil]
565
+ end
566
+
567
+ # assemble everything
568
+ scope.statements <<
569
+ if body_int.statements.empty?
570
+ if body_other
571
+ body_other.belse = default
572
+ body_other_head
573
+ else
574
+ raise Fail, "empty case? #{ast.inspect}" if not default
575
+ default
576
+ end
577
+ else
578
+ if body_other_head
579
+ @default_label_cnt ||= 0
580
+ dfl = "default_label_#{@default_label_cnt += 1}"
581
+ body_other_head = C::Label.new(dfl, body_other_head)
582
+ body_int.statements << C::Case.new('default', nil, C::Goto.new(dfl))
583
+ body_other.belse = default if default
584
+ end
585
+ body_int = C::Switch.new(C::CExpression[var, :>>, 1], body_int)
586
+ C::If.new(C::CExpression[var, :&, 1], body_int, body_other_head)
587
+ end
588
+
589
+ ret
590
+ end
591
+
592
+ # create a C::CExpr[toplevel.symbol[name], :funcall, args]
593
+ # casts int/strings in arglist to CExpr
594
+ def fcall(fname, *arglist)
595
+ args = arglist.map { |a| (a.kind_of?(Integer) or a.kind_of?(String)) ? [a] : a }
596
+ fv = @cp.toplevel.symbol[fname]
597
+ raise "need prototype for #{fname}!" if not fv
598
+ C::CExpression[fv, :funcall, args]
599
+ end
600
+
601
+ # the VALUE typedef
602
+ def value
603
+ @cp.toplevel.symbol['VALUE']
604
+ end
605
+
606
+ # declare a new function variable
607
+ # no initializer if init == :none
608
+ def declare_newvar(name, initializer)
609
+ v = C::Variable.new(name, value)
610
+ v.initializer = initializer if initializer != :none
611
+ @scope.symbol[v.name] = v
612
+ @scope.statements << C::Declaration.new(v)
613
+ v
614
+ end
615
+
616
+ # return a string suitable for use as a variable name
617
+ # hexencode any char not in [A-z0-9_]
618
+ def escape_varname(n)
619
+ n.gsub(/[^\w]/) { |c| c.unpack('H*')[0] }
620
+ end
621
+
622
+ # retrieve or create a local var
623
+ # pass :none to avoid initializer
624
+ def get_var(name, initializer=:none)
625
+ name = escape_varname(name)
626
+ @scope.symbol[name] ||= declare_newvar(name, initializer || rb_nil)
627
+ end
628
+
629
+ # create a new temporary variable
630
+ # XXX put_var ?
631
+ def get_new_tmp_var(base=nil, var=nil)
632
+ return var if var.kind_of? C::Variable
633
+ @tmp_var_id ||= 0
634
+ get_var("tmp_#{"#{base}_" if base}#{@tmp_var_id += 1}")
635
+ end
636
+
637
+ # retrieve/create a new local variable with optionnal initializer
638
+ def local(n, init=nil)
639
+ get_var "local_#{n}", init
640
+ end
641
+
642
+ # retrieve/create a new dynamic variable (block argument/variable)
643
+ # pass :none to avoid initializer
644
+ def dvar(n, init=nil)
645
+ get_var "dvar_#{n}", init
646
+ end
647
+
648
+ # retrieve self (1st func arg)
649
+ def rb_self
650
+ @scope.symbol['self']
651
+ end
652
+
653
+ # returns a CExpr casting expr to a VALUE*
654
+ def rb_cast_pvalue(expr, idx)
655
+ C::CExpression[[[expr], C::Pointer.new(value)], :'[]', [idx]]
656
+ end
657
+
658
+ # retrieve the current class, from self->klass
659
+ # XXX will segfault with self.kind_of? Fixnum/true/false/nil/sym
660
+ def rb_selfclass
661
+ rb_cast_pvalue(rb_self, 1)
662
+ end
663
+
664
+ def rb_nil
665
+ C::CExpression[[nil.object_id], value]
666
+ end
667
+ def rb_true
668
+ C::CExpression[[true.object_id], value]
669
+ end
670
+ def rb_false
671
+ C::CExpression[[false.object_id], value]
672
+ end
673
+
674
+ # call rb_intern on a string
675
+ def rb_intern(n)
676
+ # use the current interpreter's value
677
+ C::CExpression[n.to_sym.to_i]
678
+ end
679
+
680
+ # create a rb_funcall construct
681
+ def rb_funcall(recv, meth, *args)
682
+ fcall('rb_funcall', recv, rb_intern(meth), args.length, *args)
683
+ end
684
+
685
+ # ruby bool test of a var
686
+ # assigns to a temporary var, and check against false/nil
687
+ def rb_test(expr, scope)
688
+ if nil.object_id == 0 or false.object_id == 0 # just to be sure
689
+ nf = nil.object_id | false.object_id
690
+ C::CExpression[[expr, :|, nf], :'!=', nf]
691
+ else
692
+ if expr.kind_of? C::Variable
693
+ tmp = expr
694
+ else
695
+ tmp = get_new_tmp_var('test')
696
+ scope.statements << C::CExpression[tmp, :'=', expr]
697
+ end
698
+ C::CExpression[[tmp, :'!=', rb_nil], :'&&', [tmp, :'!=', rb_false]]
699
+ end
700
+ end
701
+
702
+ # generate C code to raise a RuntimeError, reason
703
+ def rb_raise(reason, cls='rb_eRuntimeError')
704
+ fcall('rb_raise', rb_global(cls), reason)
705
+ end
706
+
707
+ # return a C expr equivallent to TYPE(expr) == type for non-immediate types
708
+ # XXX expr evaluated 3 times
709
+ def rb_test_class_type(expr, type)
710
+ C::CExpression[[[expr, :>, [7]], :'&&', [[expr, :&, [3]], :==, [0]]], :'&&', [[rb_cast_pvalue(expr, 0), :&, [0x3f]], :'==', [type]]]
711
+ end
712
+
713
+ # return a C expr equivallent to TYPE(expr) == T_ARRAY
714
+ def rb_test_class_ary(expr)
715
+ rb_test_class_type(expr, 9)
716
+ end
717
+ # ARY_PTR(expr)
718
+ def rb_ary_ptr(expr, idx=nil)
719
+ p = C::CExpression[[rb_cast_pvalue(expr, 4)], C::Pointer.new(value)]
720
+ idx ? C::CExpression[p, :'[]', [idx]] : p
721
+ end
722
+ # ARY_LEN(expr)
723
+ def rb_ary_len(expr)
724
+ rb_cast_pvalue(expr, 2)
725
+ end
726
+
727
+ # TYPE(expr) == T_STRING
728
+ def rb_test_class_string(expr)
729
+ rb_test_class_type(expr, 7)
730
+ end
731
+ # STR_PTR(expr)
732
+ def rb_str_ptr(expr, idx=nil)
733
+ p = C::CExpression[[rb_cast_pvalue(expr, 3)], C::Pointer.new(C::BaseType.new(:char))]
734
+ idx ? C::CExpression[p, :'[]', [idx]] : p
735
+ end
736
+ # STR_LEN(expr)
737
+ def rb_str_len(expr)
738
+ rb_cast_pvalue(expr, 2)
739
+ end
740
+
741
+ def rb_test_class_hash(expr)
742
+ rb_test_class_type(expr, 0xb)
743
+ end
744
+
745
+ # returns a static pointer to the constant
746
+ def rb_const(constname, owner = resolve_const_owner(constname))
747
+ raise Fail, "no dynamic constant resolution #{constname}" if not owner
748
+ cst = owner.const_get(constname)
749
+ C::CExpression[[RubyHack.rb_obj_to_value(cst)], value]
750
+ end
751
+
752
+ # compile a :masgn
753
+ def rb_masgn(ast, scope, want_value)
754
+ raise Fail, "masgn with no rhs #{ast.inspect}" if not ast[2]
755
+ raise Fail, "masgn with no lhs array #{ast.inspect}" if not ast[1] or ast[1][0] != :array
756
+ if not want_value and ast[2][0] == :array and not ast[3] and ast[2].length == ast[1].length
757
+ rb_masgn_optimized(ast, scope)
758
+ return nil.object_id
759
+ end
760
+ full = get_new_tmp_var('masgn', want_value)
761
+ ary = ast_to_c(ast[2], scope, full)
762
+ scope.statements << C::CExpression[full, :'=', ary] if full != ary
763
+ ast[1][1..-1].each_with_index { |e, i|
764
+ raise Fail, "weird masgn lhs #{e.inspect} in #{ast.inspect}" if e[-1] != nil
765
+ # local_42 = full[i]
766
+ e = e.dup
767
+ e[-1] = [:rb2cstmt, rb_ary_ptr(full, i)]
768
+ ast_to_c(e, scope, false)
769
+ }
770
+ if ast[3]
771
+ raise Fail, "weird masgn lhs #{e.inspect} in #{ast.inspect}" if ast[3][-1] != nil
772
+ # local_28 = full[12..-1].to_a
773
+ e = ast[3].dup
774
+ e[-1] = [:call, [:call, [:rb2cvar, full.name], '[]', [:array, [:dot2, [:lit, ast[1].length-1], [:lit, -1]]]], 'to_a']
775
+ ast_to_c(e, scope, false)
776
+ end
777
+
778
+ full
779
+ end
780
+
781
+ def rb_global(cname)
782
+ @cp.toplevel.symbol[cname]
783
+ end
784
+
785
+ # compile an optimized :masgn with rhs.length == lhs.length (no need of a ruby array)
786
+ def rb_masgn_optimized(ast, scope)
787
+ vars = []
788
+ ast[2][1..-1].each { |rhs|
789
+ var = get_new_tmp_var('masgn_opt')
790
+ vars << var
791
+ r = ast_to_c(rhs, scope, var)
792
+ scope.statements << C::CExpression[var, :'=', r] if var != r
793
+ }
794
+ ast[1][1..-1].each { |lhs|
795
+ var = vars.shift
796
+ lhs = lhs.dup
797
+ raise Fail, "weird masgn lhs #{lhs.inspect} in #{ast.inspect}" if lhs[-1] != nil
798
+ lhs[-1] = [:rb2cvar, var.name]
799
+ ast_to_c(lhs, scope, false)
800
+ }
801
+ end
802
+
803
+ # the recursive AST to C compiler
804
+ # may append C statements to scope
805
+ # returns the C::CExpr holding the VALUE of the current ruby statement
806
+ # want_value is an optionnal hint as to the returned VALUE is needed or not
807
+ # if want_value is a C::Variable, the statements should try to populate this var instead of some random tmp var
808
+ # eg to simplify :if encoding unless we have 'foo = if 42;..'
809
+ def ast_to_c(ast, scope, want_value = true)
810
+ ret =
811
+ case ast.to_a[0]
812
+ when :block
813
+ if ast[1]
814
+ ast[1..-2].each { |a| ast_to_c(a, scope, false) }
815
+ ast_to_c(ast.last, scope, want_value)
816
+ end
817
+
818
+ when :lvar
819
+ local(ast[1])
820
+ when :lasgn
821
+ if scope == @scope
822
+ l = local(ast[1], :none)
823
+ else
824
+ # w = 4 if false ; p w => should be nil
825
+ l = local(ast[1])
826
+ end
827
+ st = ast_to_c(ast[2], scope, l)
828
+ scope.statements << C::CExpression[l, :'=', st] if st != l
829
+ l
830
+ when :dvar
831
+ dvar(ast[1])
832
+ when :dasgn_curr
833
+ l = dvar(ast[1])
834
+ st = ast_to_c(ast[2], scope, l)
835
+ scope.statements << C::CExpression[l, :'=', st] if st != l
836
+ l
837
+ when :ivar
838
+ fcall('rb_ivar_get', rb_self, rb_intern(ast[1]))
839
+ when :iasgn
840
+ if want_value
841
+ tmp = get_new_tmp_var("ivar_#{ast[1]}", want_value)
842
+ scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
843
+ scope.statements << fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), tmp)
844
+ tmp
845
+ else
846
+ scope.statements << fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), ast_to_c(ast[2], scope))
847
+ end
848
+ when :cvar
849
+ fcall('rb_cvar_get', rb_selfclass, rb_intern(ast[1]))
850
+ when :cvasgn
851
+ if want_value
852
+ tmp = get_new_tmp_var("cvar_#{ast[1]}", want_value)
853
+ scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
854
+ scope.statements << fcall('rb_cvar_set', rb_selfclass, rb_intern(ast[1]), tmp, rb_false)
855
+ tmp
856
+ else
857
+ scope.statements << fcall('rb_cvar_set', rb_selfclass, rb_intern(ast[1]), ast_to_c(ast[2], scope), rb_false)
858
+ end
859
+ when :gvar
860
+ fcall('rb_gv_get', ast[1])
861
+ when :gasgn
862
+ if want_value
863
+ tmp = get_new_tmp_var("gvar_#{ast[1]}", want_value)
864
+ scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
865
+ scope.statements << fcall('rb_gv_set', ast[1], tmp)
866
+ tmp
867
+ else
868
+ scope.statements << fcall('rb_gv_set', ast[1], ast_to_c(ast[2], scope))
869
+ end
870
+ when :attrasgn # foo.bar= 42 (same as :call, except for return value)
871
+ recv = ast_to_c(ast[1], scope)
872
+ raise Fail, "unsupported #{ast.inspect}" if not ast[3] or ast[3][0] != :array
873
+ if ast[3].length != 2
874
+ if ast[2] != '[]=' or ast[3].length != 3
875
+ raise Fail, "unsupported #{ast.inspect}"
876
+ end
877
+ # foo[4] = 2
878
+ idx = ast_to_c(ast[3][1], scope)
879
+ end
880
+ arg = ast_to_c(ast[3].last, scope)
881
+ if want_value
882
+ tmp = get_new_tmp_var('call', want_value)
883
+ scope.statements << C::CExpression[tmp, :'=', arg]
884
+ end
885
+ if idx
886
+ scope.statements << rb_funcall(recv, ast[2], idx, arg)
887
+ else
888
+ scope.statements << rb_funcall(recv, ast[2], arg)
889
+ end
890
+ tmp
891
+
892
+ when :rb2cvar # hax, used in vararg parsing
893
+ get_var(ast[1])
894
+ when :rb2cstmt
895
+ ast[1]
896
+
897
+ when :block_arg
898
+ local(ast[3], fcall('rb_block_proc'))
899
+
900
+ when :lit
901
+ case ast[1]
902
+ when Symbol
903
+ # XXX ID2SYM
904
+ C::CExpression[[rb_intern(ast[1].to_s), :<<, 8], :|, 0xe]
905
+ when Range
906
+ fcall('rb_range_new', ast[1].begin.object_id, ast[1].end.object_id, ast[1].exclude_end? ? 0 : 1)
907
+ else # true/false/nil/fixnum
908
+ ast[1].object_id
909
+ end
910
+ when :self
911
+ rb_self
912
+ when :str
913
+ fcall('rb_str_new2', ast[1])
914
+ when :array
915
+ tmp = get_new_tmp_var('ary', want_value)
916
+ scope.statements << C::CExpression[tmp, :'=', fcall('rb_ary_new')]
917
+ ast[1..-1].each { |e|
918
+ scope.statements << fcall('rb_ary_push', tmp, ast_to_c(e, scope))
919
+ }
920
+ tmp
921
+ when :hash
922
+ raise Fail, "bad #{ast.inspect}" if ast[1][0] != :array
923
+ tmp = get_new_tmp_var('hash', want_value)
924
+ scope.statements << C::CExpression[tmp, :'=', fcall('rb_hash_new')]
925
+ ki = nil
926
+ ast[1][1..-1].each { |k|
927
+ if not ki
928
+ ki = k
929
+ else
930
+ scope.statements << fcall('rb_hash_aset', tmp, ast_to_c(ki, scope), ast_to_c(k, scope))
931
+ ki = nil
932
+ end
933
+ }
934
+ tmp
935
+
936
+ when :iter
937
+ if v = optimize_iter(ast, scope, want_value)
938
+ return v
939
+ end
940
+ # for full support of :iter, we need access to the interpreter's ruby_block private global variable in eval.c
941
+ # we can find it by analysing rb_block_given_p, but this won't work with a static precompiled rubyhack...
942
+ # even with access to ruby_block, there we would need to redo PUSH_BLOCK, create a temporary dvar list,
943
+ # handle [:break, lol], and do all the stack magic reused in rb_yield (probably incl setjmp etc)
944
+ raise Fail, "unsupported iter #{ast[3].inspect} { | #{ast[1].inspect} | #{ast[2].inspect} }"
945
+
946
+ when :call, :vcall, :fcall
947
+ if v = optimize_call(ast, scope, want_value)
948
+ return v
949
+ end
950
+ recv = ((ast[0] == :call) ? ast_to_c(ast[1], scope) : rb_self)
951
+ if not ast[3]
952
+ f = rb_funcall(recv, ast[2])
953
+ elsif ast[3][0] == :array
954
+ args = ast[3][1..-1].map { |a| ast_to_c(a, scope) }
955
+ f = rb_funcall(recv, ast[2], *args)
956
+ elsif ast[3][0] == :splat
957
+ args = ast_to_c(ast[3], scope)
958
+ if not args.kind_of? C::Variable
959
+ tmp = get_new_tmp_var('args', want_value)
960
+ scope.statements << C::CExpression[tmp, :'=', args]
961
+ args = tmp
962
+ end
963
+ f = fcall('rb_funcall3', recv, rb_intern(ast[2]), rb_ary_len(args), rb_ary_ptr(args))
964
+ # elsif ast[3][0] == :argscat
965
+ else
966
+ raise Fail, "unsupported #{ast.inspect}"
967
+ end
968
+ if want_value
969
+ tmp ||= get_new_tmp_var('call', want_value)
970
+ scope.statements << C::CExpression[tmp, :'=', f]
971
+ tmp
972
+ else
973
+ scope.statements << f
974
+ f
975
+ end
976
+
977
+ when :if, :when
978
+ if ast[0] == :when and ast[1][0] == :array
979
+ cnd = nil
980
+ ast[1][1..-1].map { |cd| rb_test(ast_to_c(cd, scope), scope) }.each { |cd|
981
+ cnd = (cnd ? C::CExpression[cnd, :'||', cd] : cd)
982
+ }
983
+ else
984
+ cnd = rb_test(ast_to_c(ast[1], scope), scope)
985
+ end
986
+
987
+ tbdy = C::Block.new(scope)
988
+ ebdy = C::Block.new(scope) if ast[3] or want_value
989
+
990
+ if want_value
991
+ tmp = get_new_tmp_var('if', want_value)
992
+ thn = ast_to_c(ast[2], tbdy, tmp)
993
+ tbdy.statements << C::CExpression[tmp, :'=', thn] if tmp != thn
994
+ if ast[3]
995
+ els = ast_to_c(ast[3], ebdy, tmp)
996
+ else
997
+ # foo = if bar ; baz ; end => nil if !bar
998
+ els = rb_nil
999
+ end
1000
+ ebdy.statements << C::CExpression[tmp, :'=', els] if tmp != els
1001
+ else
1002
+ ast_to_c(ast[2], tbdy, false)
1003
+ ast_to_c(ast[3], ebdy, false)
1004
+ end
1005
+
1006
+ scope.statements << C::If.new(cnd, tbdy, ebdy)
1007
+
1008
+ tmp
1009
+
1010
+ when :while, :until
1011
+ pib = @iter_break
1012
+ @iter_break = nil # XXX foo = while ()...
1013
+
1014
+ body = C::Block.new(scope)
1015
+ if ast[3] == 0 # do .. while();
1016
+ ast_to_c(ast[2], body, false)
1017
+ end
1018
+ t = nil
1019
+ e = C::Break.new
1020
+ t, e = e, t if ast[0] == :until
1021
+ body.statements << C::If.new(rb_test(ast_to_c(ast[1], body), body), t, e)
1022
+ if ast[3] != 0 # do .. while();
1023
+ ast_to_c(ast[2], body, false)
1024
+ end
1025
+ scope.statements << C::For.new(nil, nil, nil, body)
1026
+
1027
+ @iter_break = pib
1028
+ nil.object_id
1029
+
1030
+ when :and, :or, :not
1031
+ # beware lazy evaluation !
1032
+ tmp = get_new_tmp_var('and', want_value)
1033
+ v1 = ast_to_c(ast[1], scope, tmp)
1034
+ # and/or need that tmp has the actual v1 value (returned when shortcircuit)
1035
+ scope.statements << C::CExpression[tmp, :'=', v1] if v1 != tmp
1036
+ v1 = tmp
1037
+ case ast[0]
1038
+ when :and
1039
+ t = C::Block.new(scope)
1040
+ v2 = ast_to_c(ast[2], t, tmp)
1041
+ t.statements << C::CExpression[tmp, :'=', v2] if v2 != tmp
1042
+ when :or
1043
+ e = C::Block.new(scope)
1044
+ v2 = ast_to_c(ast[2], e, tmp)
1045
+ e.statements << C::CExpression[tmp, :'=', v2] if v2 != tmp
1046
+ when :not
1047
+ t = C::CExpression[tmp, :'=', rb_false]
1048
+ e = C::CExpression[tmp, :'=', rb_true]
1049
+ end
1050
+ scope.statements << C::If.new(rb_test(v1, scope), t, e)
1051
+ tmp
1052
+ when :return
1053
+ scope.statements << C::Return.new(ast_to_c(ast[1], scope))
1054
+ nil.object_id
1055
+ when :break
1056
+ if @iter_break
1057
+ v = (ast[1] ? ast_to_c(ast[1], scope, @iter_break) : nil.object_id)
1058
+ scope.statements << C::CExpression[@iter_break, :'=', [[v], value]] if @iter_break != v
1059
+ end
1060
+ scope.statements << C::Break.new
1061
+ nil.object_id
1062
+
1063
+ when nil, :args
1064
+ nil.object_id
1065
+ when :nil
1066
+ rb_nil
1067
+ when :false
1068
+ rb_false
1069
+ when :true
1070
+ rb_true
1071
+ when :const
1072
+ rb_const(ast[1])
1073
+ when :colon2
1074
+ if cst = check_const(ast[1])
1075
+ rb_const(ast[2], cst)
1076
+ else
1077
+ fcall('rb_const_get', ast_to_c(ast[1], scope), rb_intern(ast[2]))
1078
+ end
1079
+ when :colon3
1080
+ rb_const(ast[1], ::Object)
1081
+ when :defined
1082
+ case ast[1][0]
1083
+ when :ivar
1084
+ fcall('rb_ivar_defined', rb_self, rb_intern(ast[1][1]))
1085
+ else
1086
+ raise Fail, "unsupported #{ast.inspect}"
1087
+ end
1088
+ when :masgn
1089
+ # parallel assignment: put everything in an Array, then pop everything back?
1090
+ rb_masgn(ast, scope, want_value)
1091
+
1092
+ when :evstr
1093
+ fcall('rb_obj_as_string', ast_to_c(ast[1], scope))
1094
+ when :dot2, :dot3
1095
+ fcall('rb_range_new', ast_to_c(ast[1], scope), ast_to_c(ast[2], scope), ast[0] == :dot2 ? 0 : 1)
1096
+ when :splat
1097
+ fcall('rb_Array', ast_to_c(ast[1], scope))
1098
+ when :to_ary
1099
+ fcall('rb_ary_to_ary', ast_to_c(ast[1], scope))
1100
+ when :dstr
1101
+ # dynamic string: "foo#{bar}baz"
1102
+ tmp = get_new_tmp_var('dstr')
1103
+ scope.statements << C::CExpression[tmp, :'=', fcall('rb_str_new2', ast[1][1])]
1104
+ ast[2..-1].compact.each { |s|
1105
+ if s[0] == :str # directly append the char*
1106
+ scope.statements << fcall('rb_str_cat2', tmp, s[1])
1107
+ else
1108
+ scope.statements << fcall('rb_str_append', tmp, ast_to_c(s, scope))
1109
+ end
1110
+ }
1111
+ tmp
1112
+ when :case
1113
+ compile_case(ast, scope, want_value)
1114
+ when :ensure
1115
+ # TODO
1116
+ ret = ast_to_c(ast[1], scope, want_value)
1117
+ ast_to_c(ast[3], scope, false)
1118
+ ret
1119
+ else
1120
+ raise Fail, "unsupported #{ast.inspect}"
1121
+ end
1122
+
1123
+ if want_value
1124
+ ret = C::CExpression[[ret], value] if ret.kind_of? Integer or ret.kind_of? String
1125
+ ret
1126
+ end
1127
+ end
1128
+
1129
+ # optional optimization of a call (eg a == 1, c+2, ...)
1130
+ # return nil for normal rb_funcall, or a C::CExpr to use as retval.
1131
+ def optimize_call(ast, scope, want_value)
1132
+ ce = C::CExpression
1133
+ op = ast[2]
1134
+ int = C::BaseType.new(:ptr) # signed VALUE
1135
+ args = ast[3][1..-1] if ast[3] and ast[3][0] == :array
1136
+ arg0 = args[0] if args and args[0]
1137
+
1138
+ if arg0 and arg0[0] == :lit and arg0[1].kind_of? Fixnum
1139
+ # optimize 'x==42', 'x+42', 'x-42'
1140
+ o2 = arg0[1]
1141
+ return if not %w[== > < >= <= + -].include? op
1142
+ if o2 < 0 and ['+', '-'].include? op
1143
+ # need o2 >= 0 for overflow detection
1144
+ op = {'+' => '-', '-' => '+'}[op]
1145
+ o2 = -o2
1146
+ return if not o2.kind_of? Fixnum # -0x40000000
1147
+ end
1148
+
1149
+ int_v = o2.object_id
1150
+ recv = ast_to_c(ast[1], scope)
1151
+ tmp = get_new_tmp_var('opt', want_value)
1152
+ if not recv.kind_of? C::Variable
1153
+ scope.statements << ce[tmp, :'=', recv]
1154
+ recv = tmp
1155
+ end
1156
+
1157
+ case op
1158
+ when '=='
1159
+ # XXX assume == only return true for full equality: if not Fixnum, then always false
1160
+ # which breaks 1.0 == 1 and maybe others, but its ok
1161
+ scope.statements << C::If.new(ce[recv, :'==', [int_v]], ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
1162
+ when '>', '<', '>=', '<='
1163
+ # do the actual comparison on signed >>1 if both Fixnum
1164
+ t = C::If.new(
1165
+ ce[[[[recv], int], :>>, [1]], op.to_sym, [[[int_v], int], :>>, [1]]],
1166
+ ce[tmp, :'=', rb_true],
1167
+ ce[tmp, :'=', rb_false])
1168
+ # fallback to actual rb_funcall
1169
+ e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
1170
+ add_optimized_statement scope, ast[1], recv, 'fixnum' => t, 'other' => e
1171
+ when '+'
1172
+ e = ce[recv, :+, [int_v-1]] # overflow to Bignum ?
1173
+ cnd = ce[[recv, :&, [1]], :'&&', [[[recv], int], :<, [[e], int]]]
1174
+ t = ce[tmp, :'=', e]
1175
+ e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
1176
+ if @optim_hint[ast[1]] == 'fixnum'
1177
+ # add_optimized_statement wont handle the overflow check correctly
1178
+ scope.statements << t
1179
+ else
1180
+ scope.statements << C::If.new(cnd, t, e)
1181
+ end
1182
+ when '-'
1183
+ e = ce[recv, :-, [int_v-1]]
1184
+ cnd = ce[[recv, :&, [1]], :'&&', [[[recv], int], :>, [[e], int]]]
1185
+ t = ce[tmp, :'=', e]
1186
+ e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
1187
+ if @optim_hint[ast[1]] == 'fixnum'
1188
+ scope.statements << t
1189
+ else
1190
+ scope.statements << C::If.new(cnd, t, e)
1191
+ end
1192
+ end
1193
+ tmp
1194
+
1195
+ # Symbol#==
1196
+ elsif arg0 and arg0[0] == :lit and arg0[1].kind_of? Symbol and op == '=='
1197
+ s_v = ast_to_c(arg0, scope)
1198
+ tmp = get_new_tmp_var('opt', want_value)
1199
+ recv = ast_to_c(ast[1], scope, tmp)
1200
+ if not recv.kind_of? C::Variable
1201
+ scope.statements << ce[tmp, :'=', recv]
1202
+ recv = tmp
1203
+ end
1204
+
1205
+ scope.statements << C::If.new(ce[recv, :'==', [s_v]], ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
1206
+ tmp
1207
+
1208
+ elsif arg0 and op == '<<'
1209
+ tmp = get_new_tmp_var('opt', want_value)
1210
+ recv = ast_to_c(ast[1], scope, tmp)
1211
+ arg = ast_to_c(arg0, scope)
1212
+ if recv != tmp
1213
+ scope.statements << ce[tmp, :'=', recv]
1214
+ recv = tmp
1215
+ end
1216
+
1217
+ ar = fcall('rb_ary_push', recv, arg)
1218
+ st = fcall('rb_str_concat', recv, arg)
1219
+ oth = rb_funcall(recv, op, arg)
1220
+ oth = ce[tmp, :'=', oth] if want_value
1221
+
1222
+ add_optimized_statement scope, ast[1], recv, 'ary' => ar, 'string' => st, 'other' => oth
1223
+ tmp
1224
+
1225
+ elsif arg0 and args.length == 1 and op == '[]'
1226
+ return if ast[1][0] == :const # Expression[42]
1227
+ tmp = get_new_tmp_var('opt', want_value)
1228
+ recv = ast_to_c(ast[1], scope, tmp)
1229
+ if not recv.kind_of? C::Variable
1230
+ scope.statements << ce[tmp, :'=', recv]
1231
+ recv = tmp
1232
+ end
1233
+
1234
+ idx = get_new_tmp_var('idx')
1235
+ arg = ast_to_c(arg0, scope, idx)
1236
+ if not arg.kind_of? C::Variable
1237
+ scope.statements << ce[idx, :'=', arg]
1238
+ arg = idx
1239
+ end
1240
+ idx = ce[[idx], int]
1241
+
1242
+ ar = C::Block.new(scope)
1243
+ ar.statements << ce[idx, :'=', [[[arg], int], :>>, [1]]]
1244
+ ar.statements << C::If.new(ce[idx, :<, [0]], ce[idx, :'=', [idx, :+, rb_ary_len(recv)]], nil)
1245
+ ar.statements << C::If.new(ce[[idx, :<, [0]], :'||', [idx, :>=, [[rb_ary_len(recv)], int]]],
1246
+ ce[tmp, :'=', rb_nil],
1247
+ ce[tmp, :'=', rb_ary_ptr(recv, idx)])
1248
+ st = C::Block.new(scope)
1249
+ st.statements << ce[idx, :'=', [[[arg], int], :>>, [1]]]
1250
+ st.statements << C::If.new(ce[idx, :<, [0]], ce[idx, :'=', [idx, :+, rb_str_len(recv)]], nil)
1251
+ st.statements << C::If.new(ce[[idx, :<, [0]], :'||', [idx, :>=, [[rb_str_len(recv)], int]]],
1252
+ ce[tmp, :'=', rb_nil],
1253
+ ce[tmp, :'=', [[[[rb_str_ptr(recv, idx), :&, [0xff]], :<<, [1]], :|, [1]], value]])
1254
+ hsh = ce[tmp, :'=', fcall('rb_hash_aref', recv, arg)]
1255
+ oth = ce[tmp, :'=', rb_funcall(recv, op, arg)]
1256
+
1257
+ # ary/string only valid with fixnum argument !
1258
+ add_optimized_statement scope, ast[1], recv, 'hash' => hsh, 'other' => oth,
1259
+ 'ary_bnd' => ce[tmp, :'=', rb_ary_ptr(recv, ce[[[arg], int], :>>, [1]])],
1260
+ ce[[arg, :&, 1], :'&&', rb_test_class_ary(recv)] => ar,
1261
+ ce[[arg, :&, 1], :'&&', rb_test_class_string(recv)] => st
1262
+ tmp
1263
+
1264
+ elsif ast[1] and not arg0 and op == 'empty?'
1265
+ tmp = get_new_tmp_var('opt', want_value)
1266
+ recv = ast_to_c(ast[1], scope, tmp)
1267
+ if not recv.kind_of? C::Variable
1268
+ scope.statements << ce[tmp, :'=', recv]
1269
+ recv = tmp
1270
+ end
1271
+
1272
+ ar = C::If.new(rb_ary_len(recv), ce[tmp, :'=', rb_false], ce[tmp, :'=', rb_true])
1273
+
1274
+ add_optimized_statement scope, ast[1], recv, 'ary' => ar,
1275
+ 'other' => ce[tmp, :'=', rb_funcall(recv, op)]
1276
+ tmp
1277
+
1278
+ elsif ast[1] and not arg0 and op == 'pop'
1279
+ tmp = get_new_tmp_var('opt', want_value)
1280
+ recv = ast_to_c(ast[1], scope, tmp)
1281
+ if not recv.kind_of? C::Variable
1282
+ scope.statements << ce[tmp, :'=', recv]
1283
+ recv = tmp
1284
+ end
1285
+
1286
+ t = fcall('rb_ary_pop', recv)
1287
+ e = rb_funcall(recv, op)
1288
+ if want_value
1289
+ t = ce[tmp, :'=', t]
1290
+ e = ce[tmp, :'=', e]
1291
+ end
1292
+
1293
+ add_optimized_statement scope, ast[1], recv, 'ary' => t, 'other' => e
1294
+
1295
+ tmp
1296
+
1297
+ elsif ast[1] and op == 'kind_of?' and arg0 and (arg0[0] == :const or arg0[0] == :colon3)
1298
+ # TODO check const maps to toplevel when :const
1299
+ test =
1300
+ case arg0[1]
1301
+ when 'Symbol'
1302
+ tmp = get_new_tmp_var('kindof', want_value)
1303
+ ce[[ast_to_c(ast[1], scope, tmp), :'&', [0xf]], :'==', [0xe]]
1304
+ #when 'Numeric', 'Integer'
1305
+ when 'Fixnum'
1306
+ tmp = get_new_tmp_var('kindof', want_value)
1307
+ ce[ast_to_c(ast[1], scope, tmp), :'&', [0x1]]
1308
+ when 'Array'
1309
+ rb_test_class_ary(ast_to_c(ast[1], scope))
1310
+ when 'String'
1311
+ rb_test_class_string(ast_to_c(ast[1], scope))
1312
+ else return
1313
+ end
1314
+ puts "shortcut may be incorrect for #{ast.inspect}" if arg0[0] == :const
1315
+ tmp ||= get_new_tmp_var('kindof', want_value)
1316
+ scope.statements << C::If.new(test, ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
1317
+ tmp
1318
+
1319
+ elsif not ast[1] or ast[1] == [:self]
1320
+ optimize_call_static(ast, scope, want_value)
1321
+ end
1322
+ end
1323
+
1324
+ # check if the var falls in an optim_hint, if so generate only selected code
1325
+ # optim is a hash varclass (keyof @optim_hint) => c_stmt
1326
+ # optim key can also be a C::Stmt that is used in the If clause
1327
+ # if optim['ary'] == optim['ary_bnd'], you can omit the latter
1328
+ # must have an 'other' key that is calls the generic ruby method
1329
+ def add_optimized_statement(scope, varid, varc, optim={})
1330
+ cat = @optim_hint[varid]
1331
+ cat = 'ary' if cat == 'ary_bnd' and not optim['ary_bnd']
1332
+ if not st = optim[cat]
1333
+ st = optim['other']
1334
+ if not cat and optim.keys.all? { |k| k.kind_of? String }
1335
+ # no need to cascade if we have a hash and can optim ary only
1336
+ optim.each { |i, s|
1337
+ case i
1338
+ when 'ary'; st = C::If.new(rb_test_class_ary(varc), s, st)
1339
+ when 'hash'; st = C::If.new(rb_test_class_hash(varc), s, st)
1340
+ when 'string'; st = C::If.new(rb_test_class_string(varc), s, st)
1341
+ when 'other'; # already done as default case
1342
+ when 'fixnum'; # add test last
1343
+ when C::Statement; st = C::If.new(i, s, st)
1344
+ end
1345
+ }
1346
+ if fs = optim['fixnum']
1347
+ # first test to perform (fast path)
1348
+ st = C::If.new(C::CExpression[varc, :&, 1], fs, st)
1349
+ end
1350
+ end
1351
+ end
1352
+ scope.statements << st
1353
+ end
1354
+
1355
+ # return ptr, arity
1356
+ # ptr is a CExpr pointing to the C func implementing klass#method
1357
+ def get_cfuncptr(klass, method, singleton=false)
1358
+ cls = singleton ? (class << klass ; self ; end) : klass
1359
+ ptr = RubyHack.get_method_node_ptr(cls, method)
1360
+ return if ptr == 0
1361
+ ftype = RubyHack::NODETYPE[(RubyHack.memory_read_int(ptr) >> 11) & 0xff]
1362
+ return if ftype != :cfunc
1363
+ fast = RubyHack.read_node(ptr)
1364
+ arity = fast[1][:arity]
1365
+ fptr = fast[1][:fptr]
1366
+
1367
+ fproto = C::Function.new(value, [])
1368
+ case arity
1369
+ when -1; fproto.args << C::Variable.new(nil, C::BaseType.new(:int)) << C::Variable.new(nil, C::Pointer.new(value)) << C::Variable.new(nil, value)
1370
+ when -2; fproto.args << C::Variable.new(nil, value) << C::Variable.new(nil, value)
1371
+ else (arity+1).times { fproto.args << C::Variable.new(nil, value) }
1372
+ end
1373
+
1374
+ C::CExpression[[fptr], C::Pointer.new(fproto)]
1375
+ end
1376
+
1377
+ # call C funcs directly
1378
+ # assume private function calls are not virtual and hardlink them here
1379
+ def optimize_call_static(ast, scope, want_value)
1380
+ arity = method_arity(ast[2]) rescue return
1381
+ if ast[2].to_s == @meth.to_s
1382
+ # self is recursive
1383
+ fptr = @cur_cfunc
1384
+ else
1385
+ fptr = get_cfuncptr(@klass, ast[2], @meth_singleton)
1386
+ return if not fptr
1387
+ end
1388
+
1389
+ c_arglist = []
1390
+
1391
+ if not ast[3]
1392
+ args = []
1393
+ elsif ast[3][0] == :array
1394
+ args = ast[3][1..-1]
1395
+ elsif ast[3][0] == :splat
1396
+ args = ast_to_c(ast[3], scope)
1397
+ if arity != -2 and !args.kind_of?(C::Variable)
1398
+ tmp = get_new_tmp_var('arg')
1399
+ scope.statements << C::CExpression[tmp, :'=', args]
1400
+ args = tmp
1401
+ end
1402
+ case arity
1403
+ when -2
1404
+ c_arglist << rb_self << args
1405
+ when -1
1406
+ c_arglist << [rb_ary_len(args)] << rb_ary_ptr(args) << rb_self
1407
+ else
1408
+ cnd = C::CExpression[rb_ary_len(args), :'!=', [arity]]
1409
+ scope.statements << C::If.new(cnd, rb_raise("#{arity} args expected", 'rb_eArgumentError'), nil)
1410
+
1411
+ c_arglist << rb_self
1412
+ arity.times { |i| c_arglist << rb_ary_ptr(args, i) }
1413
+ end
1414
+ arity = :canttouchthis
1415
+ else return # TODO
1416
+ end
1417
+
1418
+ case arity
1419
+ when :canttouchthis
1420
+ when -2
1421
+ arg = get_new_tmp_var('arg')
1422
+ scope.statements << C::CExpression[arg, :'=', fcall('rb_ary_new')]
1423
+ args.each { |a|
1424
+ scope.statements << fcall('rb_ary_push', arg, ast_to_c(a, scope))
1425
+ }
1426
+ c_arglist << rb_self << arg
1427
+
1428
+ when -1
1429
+ case args.length
1430
+ when 0
1431
+ argv = C::CExpression[[0], C::Pointer.new(value)]
1432
+ when 1
1433
+ val = ast_to_c(args[0], scope)
1434
+ if not val.kind_of? C::Variable
1435
+ argv = get_new_tmp_var('argv')
1436
+ scope.statements << C::CExpression[argv, :'=', val]
1437
+ val = argv
1438
+ end
1439
+ argv = C::CExpression[:'&', val]
1440
+ else
1441
+ argv = get_new_tmp_var('argv')
1442
+ argv.type = C::Array.new(value, args.length)
1443
+ args.each_with_index { |a, i|
1444
+ val = ast_to_c(a, scope)
1445
+ scope.statements << C::CExpression[[argv, :'[]', [i]], :'=', val]
1446
+ }
1447
+ end
1448
+ c_arglist << [args.length] << argv << rb_self
1449
+
1450
+ else
1451
+ c_arglist << rb_self
1452
+ args.each { |a|
1453
+ va = get_new_tmp_var('arg')
1454
+ val = ast_to_c(a, scope, va)
1455
+ scope.statements << C::CExpression[va, :'=', val] if val != va
1456
+ c_arglist << va
1457
+ }
1458
+ end
1459
+
1460
+ f = C::CExpression[fptr, :funcall, c_arglist]
1461
+ if want_value
1462
+ ret = get_new_tmp_var('ccall', want_value)
1463
+ scope.statements << C::CExpression[ret, :'=', f]
1464
+ ret
1465
+ else
1466
+ scope.statements << f
1467
+ end
1468
+ end
1469
+
1470
+ def optimize_iter(ast, scope, want_value)
1471
+ b_args, b_body, b_recv = ast[1, 3]
1472
+
1473
+ old_ib = @iter_break
1474
+ if want_value
1475
+ # a new tmpvar, so we can overwrite it in 'break :foo'
1476
+ @iter_break = get_new_tmp_var('iterbreak')
1477
+ else
1478
+ @iter_break = nil
1479
+ end
1480
+
1481
+ if b_recv[0] == :call and b_recv[2] == 'reverse_each'
1482
+ # convert ary.reverse_each to ary.reverse.each
1483
+ b_recv = b_recv.dup
1484
+ b_recv[1] = [:call, b_recv[1], 'reverse']
1485
+ b_recv[2] = 'each'
1486
+ elsif b_recv[0] == :call and b_recv[2] == 'each_key'
1487
+ # convert hash.each_key to hash.keys.each
1488
+ b_recv = b_recv.dup
1489
+ b_recv[1] = [:call, b_recv[1], 'keys']
1490
+ b_recv[2] = 'each'
1491
+ end
1492
+
1493
+ # loop { }
1494
+ if b_recv[0] == :fcall and b_recv[2] == 'loop'
1495
+ body = C::Block.new(scope)
1496
+ ast_to_c(b_body, body, false)
1497
+ scope.statements << C::For.new(nil, nil, nil, body)
1498
+
1499
+ # int.times { |i| }
1500
+ elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'times'
1501
+ limit = get_new_tmp_var('limit')
1502
+ recv = ast_to_c(b_recv[1], scope, limit)
1503
+ scope.statements << C::If.new(C::CExpression[:'!', [recv, :&, 1]], rb_raise('only Fixnum#times handled'), nil)
1504
+ if want_value
1505
+ scope.statements << C::CExpression[@iter_break, :'=', recv]
1506
+ end
1507
+ scope.statements << C::CExpression[limit, :'=', [recv, :>>, 1]]
1508
+ cntr = get_new_tmp_var('cntr')
1509
+ cntr.type = C::BaseType.new(:int, :unsigned)
1510
+ body = C::Block.new(scope)
1511
+ if b_args and b_args[0] == :dasgn_curr
1512
+ body.statements << C::CExpression[dvar(b_args[1]), :'=', [[cntr, :<<, 1], :|, 1]]
1513
+ end
1514
+ ast_to_c(b_body, body, false)
1515
+ scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, limit], C::CExpression[:'++', cntr], body)
1516
+
1517
+ # ary.each { |e| }
1518
+ elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'each' and b_args and
1519
+ b_args[0] == :dasgn_curr
1520
+ ary = get_new_tmp_var('ary')
1521
+ recv = ast_to_c(b_recv[1], scope, ary)
1522
+ scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
1523
+ scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#each { |e| } handled'))
1524
+ if want_value
1525
+ scope.statements << C::CExpression[@iter_break, :'=', ary]
1526
+ end
1527
+ cntr = get_new_tmp_var('cntr')
1528
+ cntr.type = C::BaseType.new(:int, :unsigned)
1529
+ body = C::Block.new(scope)
1530
+ if b_args and b_args[0] == :dasgn_curr
1531
+ body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
1532
+ end
1533
+ ast_to_c(b_body, body, false)
1534
+ scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
1535
+
1536
+ # ary.find { |e| }
1537
+ elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'find' and b_args and
1538
+ b_args[0] == :dasgn_curr
1539
+ ary = get_new_tmp_var('ary')
1540
+ recv = ast_to_c(b_recv[1], scope, ary)
1541
+ scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
1542
+ scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#find { |e| } handled'))
1543
+ if want_value
1544
+ scope.statements << C::CExpression[@iter_break, :'=', rb_nil]
1545
+ end
1546
+ cntr = get_new_tmp_var('cntr')
1547
+ cntr.type = C::BaseType.new(:int, :unsigned)
1548
+ body = C::Block.new(scope)
1549
+ if b_args and b_args[0] == :dasgn_curr
1550
+ body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
1551
+ end
1552
+ # same as #each up to this point (except default retval), now add a 'if (body_value) break ary[cntr];'
1553
+ # XXX 'find { next true }'
1554
+
1555
+ found = ast_to_c(b_body, body)
1556
+ t = C::Block.new(body)
1557
+ t.statements << C::CExpression[@iter_break, :'=', rb_ary_ptr(ary, cntr)]
1558
+ t.statements << C::Break.new
1559
+ body.statements << C::If.new(rb_test(found, body), t, nil)
1560
+
1561
+ scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
1562
+
1563
+ # ary.map { |e| }
1564
+ elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'map' and b_args and
1565
+ b_args[0] == :dasgn_curr
1566
+ ary = get_new_tmp_var('ary')
1567
+ recv = ast_to_c(b_recv[1], scope, ary)
1568
+ scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
1569
+ scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#map { |e| } handled'))
1570
+ if want_value
1571
+ scope.statements << C::CExpression[@iter_break, :'=', fcall('rb_ary_new')]
1572
+ end
1573
+ cntr = get_new_tmp_var('cntr')
1574
+ cntr.type = C::BaseType.new(:int, :unsigned)
1575
+ body = C::Block.new(scope)
1576
+ if b_args and b_args[0] == :dasgn_curr
1577
+ body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
1578
+ end
1579
+ # same as #each up to this point (except default retval), now add a '@iter_break << body_value'
1580
+ # XXX 'next' unhandled
1581
+
1582
+ val = ast_to_c(b_body, body)
1583
+ body.statements << fcall('rb_ary_push', @iter_break, val)
1584
+
1585
+ scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
1586
+
1587
+ else
1588
+ @iter_break = old_ib
1589
+ return
1590
+ end
1591
+
1592
+ ret = @iter_break
1593
+ @iter_break = old_ib
1594
+ ret || nil.object_id
1595
+ end
1596
+ end
1597
+
1598
+ # a ruby2c C generator for use in the any ruby interpreter (generates C suitable for use as a standard Ruby extension)
1599
+ class RubyStaticCompiler < RubyLiveCompiler
1600
+ # add a new ruby function to the current @cp
1601
+ def self.compile(klass, *methlist)
1602
+ @rcp ||= new
1603
+ methlist.each { |meth|
1604
+ ast = RubyHack.read_method_ast(klass, meth)
1605
+ @rcp.compile(ast, klass, meth)
1606
+ }
1607
+ self
1608
+ end
1609
+
1610
+ def self.compile_singleton(klass, *methlist)
1611
+ @rcp ||= new
1612
+ methlist.each { |meth|
1613
+ ast = RubyHack.read_singleton_method_ast(klass, meth)
1614
+ @rcp.compile(ast, klass, meth, true)
1615
+ }
1616
+ self
1617
+ end
1618
+
1619
+ def self.dump
1620
+ <<EOS + @rcp.cp.dump_definition('Init_compiledruby')
1621
+ #ifdef __ELF__
1622
+ asm .pt_gnu_stack rw;
1623
+ #endif
1624
+ EOS
1625
+ end
1626
+
1627
+ def dump(m="Init_compiledruby")
1628
+ m ? @cp.dump_definition(m, 'do_init_once') : @cp.to_s
1629
+ end
1630
+
1631
+ def initialize(cp=nil)
1632
+ super(cp)
1633
+
1634
+ @cp.parse <<EOS
1635
+ // static VALUE method(VALUE self, VALUE arg0, VALUE arg1) { return (VALUE)0; }
1636
+ // static VALUE const_Lol;
1637
+ static void do_init_once(void) {
1638
+ // const_Lol = rb_const_get(*rb_cObject, rb_intern("Lol"));
1639
+ // rb_define_method(const_Lol, "method", method, 2);
1640
+ }
1641
+
1642
+ int Init_compiledruby(void) __attribute__((export)) {
1643
+ // use a separate func to avoid having to append statements before the 'return'
1644
+ do_init_once();
1645
+ return 0;
1646
+ }
1647
+ EOS
1648
+ end
1649
+
1650
+ # returns the 'do_init_once' function body
1651
+ def init
1652
+ @cp.toplevel.symbol['do_init_once'].initializer
1653
+ end
1654
+
1655
+ def compile(ast, klass, method, singleton=false)
1656
+ @compiled_func_cache ||= {}
1657
+
1658
+ mname = super(ast, klass, method, singleton)
1659
+ return if not mname
1660
+
1661
+ @compiled_func_cache[[klass, method.to_s, singleton]] = @cur_cfunc
1662
+
1663
+ cls = rb_const(nil, klass)
1664
+
1665
+ init.statements << fcall("rb_define#{'_singleton' if singleton}_method", cls, method.to_s, @cur_cfunc, method_arity)
1666
+
1667
+ mname
1668
+ end
1669
+
1670
+ def declare_newtopvar(name, initializer, type=value)
1671
+ v = C::Variable.new(name, type)
1672
+ v.storage = :static
1673
+ @cp.toplevel.symbol[v.name] = v
1674
+ pos = @cp.toplevel.statements.index @cp.toplevel.statements.find { |st|
1675
+ st.kind_of? C::Declaration and st.var.type.kind_of? C::Function and st.var.initializer
1676
+ } || -1
1677
+ @cp.toplevel.statements.insert pos, C::Declaration.new(v)
1678
+
1679
+ if initializer
1680
+ pos = -1
1681
+ if name =~ /^intern_/
1682
+ pos = 0
1683
+ init.statements.each { |st|
1684
+ break unless st.kind_of? C::CExpression and st.op == :'=' and st.lexpr.kind_of? C::Variable and st.lexpr.name < name
1685
+ pos += 1
1686
+ }
1687
+ end
1688
+ init.statements.insert(pos, C::CExpression[v, :'=', initializer])
1689
+ end
1690
+
1691
+ v
1692
+ end
1693
+
1694
+ def rb_intern(sym)
1695
+ n = escape_varname("intern_#{sym}")
1696
+ @cp.toplevel.symbol[n] || declare_newtopvar(n, fcall('rb_intern', sym.to_s), C::BaseType.new(:int, :unsigned))
1697
+ end
1698
+
1699
+ # rb_const 'FOO', Bar::Baz ==>
1700
+ # const_Bar = rb_const_get(rb_cObject, rb_intern("Bar"));
1701
+ # const_Bar_Baz = rb_const_get(const_Bar, rb_intern("Baz"));
1702
+ # const_Bar_Baz_FOO = rb_const_get(const_Bar_Baz, rb_intern("FOO"));
1703
+ # use rb_const(nil, class) to get a pointer to a class/module
1704
+ def rb_const(constname, owner = resolve_const_owner(constname))
1705
+ raise Fail, "no dynamic constant resolution #{constname}" if not owner
1706
+
1707
+ @const_value ||= { [::Object, 'Object'] => rb_global('rb_cObject') }
1708
+
1709
+ k = ::Object
1710
+ v = nil
1711
+ cname = owner.name
1712
+ cname += '::' + constname if constname
1713
+ cname.split('::').each { |n|
1714
+ kk = k.const_get(n)
1715
+ if not v = @const_value[[k, n]]
1716
+ # class A ; end ; B = A => B.name => 'A'
1717
+ vn = "const_#{escape_varname((k.name + '::' + n).sub(/^Object::/, '').gsub('::', '_'))}"
1718
+ vi = fcall('rb_const_get', rb_const(nil, k), fcall('rb_intern', n))
1719
+ v = declare_newtopvar(vn, vi)
1720
+ # n wont be reused, so do not alloc a global intern_#{n} for this
1721
+ @const_value[[k, n]] = v
1722
+ end
1723
+ k = kk
1724
+ }
1725
+ v
1726
+ end
1727
+
1728
+ # TODO remove this when the C compiler is fixed
1729
+ def rb_global(cname)
1730
+ C::CExpression[:*, @cp.toplevel.symbol[cname]]
1731
+ end
1732
+
1733
+ def get_cfuncptr(klass, method, singleton=false)
1734
+ # is it a func we have in the current cparser ?
1735
+ if ptr = @compiled_func_cache[[klass, method.to_s, singleton]]
1736
+ return ptr
1737
+ end
1738
+
1739
+ # check if it's a C or ruby func in the current interpreter
1740
+ cls = singleton ? (class << klass ; self ; end) : klass
1741
+ ptr = RubyHack.get_method_node_ptr(cls, method)
1742
+ return if ptr == 0
1743
+ ftype = RubyHack::NODETYPE[(RubyHack.memory_read_int(ptr) >> 11) & 0xff]
1744
+ return if ftype != :cfunc
1745
+
1746
+ # ok, so assume it will be the same next time
1747
+ n = escape_varname "fptr_#{klass.name}#{singleton ? '.' : '#'}#{method}".gsub('::', '_')
1748
+ if not v = @cp.toplevel.symbol[n]
1749
+ v = get_cfuncptr_dyn(klass, method, singleton, n)
1750
+ end
1751
+
1752
+ v
1753
+ end
1754
+
1755
+ def get_cfuncptr_dyn(klass, method, singleton, n)
1756
+ arity = singleton ? klass.method(method).arity : klass.instance_method(method).arity
1757
+ fproto = C::Function.new(value, [])
1758
+ case arity
1759
+ when -1; fproto.args << C::Variable.new(nil, C::BaseType.new(:int)) << C::Variable.new(nil, C::Pointer.new(value)) << C::Variable.new(nil, value)
1760
+ when -2; fproto.args << C::Variable.new(nil, value) << C::Variable.new(nil, value)
1761
+ else (arity+1).times { fproto.args << C::Variable.new(nil, value) }
1762
+ end
1763
+
1764
+ if not ptr = init.symbol['ptr']
1765
+ ptr = C::Variable.new('ptr', C::Pointer.new(C::BaseType.new(:int)))
1766
+ init.symbol[ptr.name] = ptr
1767
+ init.statements << C::Declaration.new(ptr)
1768
+ end
1769
+
1770
+ cls = rb_const(nil, klass)
1771
+ cls = fcall('rb_singleton_class', cls) if singleton
1772
+ init.statements << C::CExpression[ptr, :'=', fcall('rb_method_node', cls, rb_intern(method))]
1773
+
1774
+ # dynamically recheck that klass#method is a :cfunc
1775
+ cnd = C::CExpression[[:'!', ptr], :'||', [[[[ptr, :'[]', [0]], :>>, [11]], :&, [0xff]], :'!=', [RubyHack::NODETYPE.index(:cfunc)]]]
1776
+ init.statements << C::If.new(cnd, rb_raise("CFunc expected at #{klass}#{singleton ? '.' : '#'}#{method}"), nil)
1777
+
1778
+ vi = C::CExpression[[ptr, :'[]', [1]], C::Pointer.new(fproto)]
1779
+ declare_newtopvar(n, vi, C::Pointer.new(fproto))
1780
+ end
1781
+
1782
+ if defined? $trace_rbfuncall and $trace_rbfuncall
1783
+ # dynamic trace of all rb_funcall made from our module
1784
+ def rb_funcall(recv, meth, *args)
1785
+ if not defined? @rb_fcid
1786
+ @cp.parse <<EOS
1787
+ int atexit(void(*)(void));
1788
+ int printf(char*, ...);
1789
+
1790
+ static unsigned rb_fcid_max = 1;
1791
+ static unsigned rb_fcntr[1];
1792
+
1793
+ static void rb_fcstat(void)
1794
+ {
1795
+ unsigned i;
1796
+ for (i=0 ; i<rb_fcid_max ; ++i)
1797
+ if (rb_fcntr[i])
1798
+ printf("%u %u\\n", i, rb_fcntr[i]);
1799
+ }
1800
+ EOS
1801
+ @rb_fcid = -1
1802
+ @rb_fcntr = @cp.toplevel.symbol['rb_fcntr']
1803
+ @rb_fcid_max = @cp.toplevel.symbol['rb_fcid_max']
1804
+ init.statements << fcall('atexit', @cp.toplevel.symbol['rb_fcstat'])
1805
+ end
1806
+ @rb_fcid += 1
1807
+ @rb_fcid_max.initializer = C::CExpression[[@rb_fcid+1], @rb_fcid_max.type]
1808
+ @rb_fcntr.type.length = @rb_fcid+1
1809
+
1810
+ ctr = C::CExpression[:'++', [@rb_fcntr, :'[]', [@rb_fcid]]]
1811
+ C::CExpression[ctr, :',', super(recv, meth, *args)]
1812
+ end
1813
+ end
1814
+ end
1815
+ end
1816
+
1817
+
1818
+
1819
+
1820
+ if __FILE__ == $0 or ARGV.delete('ignore_argv0')
1821
+
1822
+ demo = case ARGV.first
1823
+ when nil; :test_jit
1824
+ when 'asm'; :inlineasm
1825
+ when 'generate'; :generate_persistent
1826
+ else :compile_ruby
1827
+ end
1828
+
1829
+
1830
+ case demo
1831
+ when :inlineasm
1832
+ # cnt.times { sys_write str }
1833
+ src_asm = <<EOS
1834
+ mov ecx, [ebp+8]
1835
+ again:
1836
+ push ecx
1837
+
1838
+ mov eax, 4
1839
+ mov ebx, 1
1840
+ mov ecx, [ebp+12]
1841
+ mov edx, [ebp+16]
1842
+ int 80h
1843
+
1844
+ pop ecx
1845
+ loop again
1846
+ EOS
1847
+
1848
+ src = <<EOS
1849
+ #{Metasm::RubyLiveCompiler::RUBY_H}
1850
+
1851
+ void doit(int, char*, int);
1852
+ VALUE foo(VALUE self, VALUE count, VALUE str) {
1853
+ doit(VAL2INT(count), STR_PTR(str), STR_LEN(str));
1854
+ return count;
1855
+ }
1856
+
1857
+ void doit(int count, char *str, int strlen) { asm(#{src_asm.inspect}); }
1858
+ EOS
1859
+
1860
+ class Foo
1861
+ end
1862
+
1863
+ m = Metasm::RubyHack.compile_c(src).encoded
1864
+
1865
+ Metasm::RubyHack.set_method_binary(Foo, 'bar', m, 2)
1866
+
1867
+ Foo.new.bar(4, "blabla\n")
1868
+ Foo.new.bar(2, "foo\n")
1869
+
1870
+
1871
+ when :compile_ruby
1872
+ abort 'need <class#method>' if ARGV.empty?
1873
+ require 'pp'
1874
+ puts '#if 0'
1875
+ ARGV.each { |av|
1876
+ next if not av =~ /^(.*)([.#])(.*)$/
1877
+ cls, sg, meth = $1, $2, $3.to_sym
1878
+ sg = { '.' => true, '#' => false }[sg]
1879
+ cls = cls.split('::').inject(::Object) { |o, cst| o.const_get(cst) }
1880
+ if sg
1881
+ ast = Metasm::RubyHack.read_singleton_method_ast(cls, meth)
1882
+ cls.method(meth) if not ast # raise NoMethodError
1883
+ puts ' --- ast ---'
1884
+ pp ast
1885
+ Metasm::RubyStaticCompiler.compile_singleton(cls, meth)
1886
+ else
1887
+ ast = Metasm::RubyHack.read_method_ast(cls, meth)
1888
+ cls.instance_method(meth) if not ast
1889
+ puts ' --- ast ---'
1890
+ pp ast
1891
+ Metasm::RubyStaticCompiler.compile(cls, meth)
1892
+ end
1893
+ }
1894
+ puts '', ' --- C ---', '#endif'
1895
+ puts Metasm::RubyStaticCompiler.dump
1896
+
1897
+
1898
+ when :test_jit
1899
+ class Foo
1900
+ def bla(x=500)
1901
+ i = 0
1902
+ x.times { i += 16 }
1903
+ i
1904
+ end
1905
+ end
1906
+
1907
+ t0 = Time.now
1908
+ Metasm::RubyLiveCompiler.compile(Foo, :bla)
1909
+ t1 = Time.now
1910
+ ret = Foo.new.bla(0x401_0000)
1911
+ puts ret.to_s(16), ret.class
1912
+ t2 = Time.now
1913
+
1914
+ puts "compile %.3fs run %.3fs" % [t1-t0, t2-t1]
1915
+
1916
+ when :generate_persistent
1917
+ Metasm::RubyStaticCompiler.compile(Metasm::Preprocessor, :getchar, :ungetchar, :unreadtok, :readtok_nopp_str, :readtok_nopp, :readtok)
1918
+ Metasm::RubyStaticCompiler.compile(Metasm::Expression, :reduce_rec, :initialize)
1919
+ Metasm::RubyStaticCompiler.compile_singleton(Metasm::Expression, :[])
1920
+ c_src = Metasm::RubyStaticCompiler.dump
1921
+ File.open('compiledruby.c', 'w') { |fd| fd.puts c_src } if $VERBOSE
1922
+ puts 'compiling..'
1923
+ begin ; require 'compiledruby' ; rescue LoadError ; end
1924
+ # To encode to a different file, you must also rename the Init_compliedruby() function to match the lib name
1925
+ Metasm::ELF.compile_c(Metasm::Ia32.new, c_src).encode_file('compiledruby.so')
1926
+ puts 'ruby -r metasm -r compiledruby ftw'
1927
+ end
1928
+
1929
+ end