metasm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BUGS +11 -0
- data/CREDITS +17 -0
- data/README +270 -0
- data/TODO +114 -0
- data/doc/code_organisation.txt +146 -0
- data/doc/const_missing.txt +16 -0
- data/doc/core_classes.txt +75 -0
- data/doc/feature_list.txt +53 -0
- data/doc/index.txt +59 -0
- data/doc/install_notes.txt +170 -0
- data/doc/style.css +3 -0
- data/doc/use_cases.txt +18 -0
- data/lib/metasm.rb +80 -0
- data/lib/metasm/arm.rb +12 -0
- data/lib/metasm/arm/debug.rb +39 -0
- data/lib/metasm/arm/decode.rb +167 -0
- data/lib/metasm/arm/encode.rb +77 -0
- data/lib/metasm/arm/main.rb +75 -0
- data/lib/metasm/arm/opcodes.rb +177 -0
- data/lib/metasm/arm/parse.rb +130 -0
- data/lib/metasm/arm/render.rb +55 -0
- data/lib/metasm/compile_c.rb +1457 -0
- data/lib/metasm/dalvik.rb +8 -0
- data/lib/metasm/dalvik/decode.rb +196 -0
- data/lib/metasm/dalvik/main.rb +60 -0
- data/lib/metasm/dalvik/opcodes.rb +366 -0
- data/lib/metasm/decode.rb +213 -0
- data/lib/metasm/decompile.rb +2659 -0
- data/lib/metasm/disassemble.rb +2068 -0
- data/lib/metasm/disassemble_api.rb +1280 -0
- data/lib/metasm/dynldr.rb +1329 -0
- data/lib/metasm/encode.rb +333 -0
- data/lib/metasm/exe_format/a_out.rb +194 -0
- data/lib/metasm/exe_format/autoexe.rb +82 -0
- data/lib/metasm/exe_format/bflt.rb +189 -0
- data/lib/metasm/exe_format/coff.rb +455 -0
- data/lib/metasm/exe_format/coff_decode.rb +901 -0
- data/lib/metasm/exe_format/coff_encode.rb +1078 -0
- data/lib/metasm/exe_format/dex.rb +457 -0
- data/lib/metasm/exe_format/dol.rb +145 -0
- data/lib/metasm/exe_format/elf.rb +923 -0
- data/lib/metasm/exe_format/elf_decode.rb +979 -0
- data/lib/metasm/exe_format/elf_encode.rb +1375 -0
- data/lib/metasm/exe_format/macho.rb +827 -0
- data/lib/metasm/exe_format/main.rb +228 -0
- data/lib/metasm/exe_format/mz.rb +164 -0
- data/lib/metasm/exe_format/nds.rb +172 -0
- data/lib/metasm/exe_format/pe.rb +437 -0
- data/lib/metasm/exe_format/serialstruct.rb +246 -0
- data/lib/metasm/exe_format/shellcode.rb +114 -0
- data/lib/metasm/exe_format/xcoff.rb +167 -0
- data/lib/metasm/gui.rb +23 -0
- data/lib/metasm/gui/cstruct.rb +373 -0
- data/lib/metasm/gui/dasm_coverage.rb +199 -0
- data/lib/metasm/gui/dasm_decomp.rb +369 -0
- data/lib/metasm/gui/dasm_funcgraph.rb +103 -0
- data/lib/metasm/gui/dasm_graph.rb +1354 -0
- data/lib/metasm/gui/dasm_hex.rb +543 -0
- data/lib/metasm/gui/dasm_listing.rb +599 -0
- data/lib/metasm/gui/dasm_main.rb +906 -0
- data/lib/metasm/gui/dasm_opcodes.rb +291 -0
- data/lib/metasm/gui/debug.rb +1228 -0
- data/lib/metasm/gui/gtk.rb +884 -0
- data/lib/metasm/gui/qt.rb +495 -0
- data/lib/metasm/gui/win32.rb +3004 -0
- data/lib/metasm/gui/x11.rb +621 -0
- data/lib/metasm/ia32.rb +14 -0
- data/lib/metasm/ia32/compile_c.rb +1523 -0
- data/lib/metasm/ia32/debug.rb +193 -0
- data/lib/metasm/ia32/decode.rb +1167 -0
- data/lib/metasm/ia32/decompile.rb +564 -0
- data/lib/metasm/ia32/encode.rb +314 -0
- data/lib/metasm/ia32/main.rb +233 -0
- data/lib/metasm/ia32/opcodes.rb +872 -0
- data/lib/metasm/ia32/parse.rb +327 -0
- data/lib/metasm/ia32/render.rb +91 -0
- data/lib/metasm/main.rb +1193 -0
- data/lib/metasm/mips.rb +11 -0
- data/lib/metasm/mips/compile_c.rb +7 -0
- data/lib/metasm/mips/decode.rb +253 -0
- data/lib/metasm/mips/encode.rb +51 -0
- data/lib/metasm/mips/main.rb +72 -0
- data/lib/metasm/mips/opcodes.rb +443 -0
- data/lib/metasm/mips/parse.rb +51 -0
- data/lib/metasm/mips/render.rb +43 -0
- data/lib/metasm/os/gnu_exports.rb +270 -0
- data/lib/metasm/os/linux.rb +1112 -0
- data/lib/metasm/os/main.rb +1686 -0
- data/lib/metasm/os/remote.rb +527 -0
- data/lib/metasm/os/windows.rb +2027 -0
- data/lib/metasm/os/windows_exports.rb +745 -0
- data/lib/metasm/parse.rb +876 -0
- data/lib/metasm/parse_c.rb +3938 -0
- data/lib/metasm/pic16c/decode.rb +42 -0
- data/lib/metasm/pic16c/main.rb +17 -0
- data/lib/metasm/pic16c/opcodes.rb +68 -0
- data/lib/metasm/ppc.rb +11 -0
- data/lib/metasm/ppc/decode.rb +264 -0
- data/lib/metasm/ppc/decompile.rb +251 -0
- data/lib/metasm/ppc/encode.rb +51 -0
- data/lib/metasm/ppc/main.rb +129 -0
- data/lib/metasm/ppc/opcodes.rb +410 -0
- data/lib/metasm/ppc/parse.rb +52 -0
- data/lib/metasm/preprocessor.rb +1277 -0
- data/lib/metasm/render.rb +130 -0
- data/lib/metasm/sh4.rb +8 -0
- data/lib/metasm/sh4/decode.rb +336 -0
- data/lib/metasm/sh4/main.rb +292 -0
- data/lib/metasm/sh4/opcodes.rb +381 -0
- data/lib/metasm/x86_64.rb +12 -0
- data/lib/metasm/x86_64/compile_c.rb +1025 -0
- data/lib/metasm/x86_64/debug.rb +59 -0
- data/lib/metasm/x86_64/decode.rb +268 -0
- data/lib/metasm/x86_64/encode.rb +264 -0
- data/lib/metasm/x86_64/main.rb +135 -0
- data/lib/metasm/x86_64/opcodes.rb +118 -0
- data/lib/metasm/x86_64/parse.rb +68 -0
- data/misc/bottleneck.rb +61 -0
- data/misc/cheader-findpppath.rb +58 -0
- data/misc/hexdiff.rb +74 -0
- data/misc/hexdump.rb +55 -0
- data/misc/metasm-all.rb +13 -0
- data/misc/objdiff.rb +47 -0
- data/misc/objscan.rb +40 -0
- data/misc/pdfparse.rb +661 -0
- data/misc/ppc_pdf2oplist.rb +192 -0
- data/misc/tcp_proxy_hex.rb +84 -0
- data/misc/txt2html.rb +440 -0
- data/samples/a.out.rb +31 -0
- data/samples/asmsyntax.rb +77 -0
- data/samples/bindiff.rb +555 -0
- data/samples/compilation-steps.rb +49 -0
- data/samples/cparser_makestackoffset.rb +55 -0
- data/samples/dasm-backtrack.rb +38 -0
- data/samples/dasmnavig.rb +318 -0
- data/samples/dbg-apihook.rb +228 -0
- data/samples/dbghelp.rb +143 -0
- data/samples/disassemble-gui.rb +102 -0
- data/samples/disassemble.rb +133 -0
- data/samples/dump_upx.rb +95 -0
- data/samples/dynamic_ruby.rb +1929 -0
- data/samples/elf_list_needed.rb +46 -0
- data/samples/elf_listexports.rb +33 -0
- data/samples/elfencode.rb +25 -0
- data/samples/exeencode.rb +128 -0
- data/samples/factorize-headers-elfimports.rb +77 -0
- data/samples/factorize-headers-peimports.rb +109 -0
- data/samples/factorize-headers.rb +43 -0
- data/samples/gdbclient.rb +583 -0
- data/samples/generate_libsigs.rb +102 -0
- data/samples/hotfix_gtk_dbg.rb +59 -0
- data/samples/install_win_env.rb +78 -0
- data/samples/lindebug.rb +924 -0
- data/samples/linux_injectsyscall.rb +95 -0
- data/samples/machoencode.rb +31 -0
- data/samples/metasm-shell.rb +91 -0
- data/samples/pe-hook.rb +69 -0
- data/samples/pe-ia32-cpuid.rb +203 -0
- data/samples/pe-mips.rb +35 -0
- data/samples/pe-shutdown.rb +78 -0
- data/samples/pe-testrelocs.rb +51 -0
- data/samples/pe-testrsrc.rb +24 -0
- data/samples/pe_listexports.rb +31 -0
- data/samples/peencode.rb +19 -0
- data/samples/peldr.rb +494 -0
- data/samples/preprocess-flatten.rb +19 -0
- data/samples/r0trace.rb +308 -0
- data/samples/rubstop.rb +399 -0
- data/samples/scan_pt_gnu_stack.rb +54 -0
- data/samples/scanpeexports.rb +62 -0
- data/samples/shellcode-c.rb +40 -0
- data/samples/shellcode-dynlink.rb +146 -0
- data/samples/source.asm +34 -0
- data/samples/struct_offset.rb +47 -0
- data/samples/testpe.rb +32 -0
- data/samples/testraw.rb +45 -0
- data/samples/win32genloader.rb +132 -0
- data/samples/win32hooker-advanced.rb +169 -0
- data/samples/win32hooker.rb +96 -0
- data/samples/win32livedasm.rb +33 -0
- data/samples/win32remotescan.rb +133 -0
- data/samples/wintrace.rb +92 -0
- data/tests/all.rb +8 -0
- data/tests/dasm.rb +39 -0
- data/tests/dynldr.rb +35 -0
- data/tests/encodeddata.rb +132 -0
- data/tests/ia32.rb +82 -0
- data/tests/mips.rb +116 -0
- data/tests/parse_c.rb +239 -0
- data/tests/preprocessor.rb +269 -0
- data/tests/x86_64.rb +62 -0
- metadata +255 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
3
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
4
|
+
#
|
|
5
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
#
|
|
9
|
+
# this script disassembles an executable (elf/pe) and dumps the output
|
|
10
|
+
# ruby -h for help
|
|
11
|
+
#
|
|
12
|
+
|
|
13
|
+
require 'metasm'
|
|
14
|
+
include Metasm
|
|
15
|
+
require 'optparse'
|
|
16
|
+
|
|
17
|
+
# parse arguments
|
|
18
|
+
opts = { :sc_cpu => 'Ia32' }
|
|
19
|
+
OptionParser.new { |opt|
|
|
20
|
+
opt.banner = 'Usage: disassemble.rb [options] <executable> [<entrypoints>]'
|
|
21
|
+
opt.on('--no-data', 'do not display data bytes') { opts[:nodata] = true }
|
|
22
|
+
opt.on('--no-data-trace', 'do not backtrace memory read/write accesses') { opts[:nodatatrace] = true }
|
|
23
|
+
opt.on('--debug-backtrace', 'enable backtrace-related debug messages (very verbose)') { opts[:debugbacktrace] = true }
|
|
24
|
+
opt.on('-c <header>', '--c-header <header>', 'read C function prototypes (for external library functions)') { |h| opts[:cheader] = h }
|
|
25
|
+
opt.on('-o <outfile>', '--output <outfile>', 'save the assembly listing in the specified file (defaults to stdout)') { |h| opts[:outfile] = h }
|
|
26
|
+
opt.on('--cpu <cpu>', 'the CPU class to use for a shellcode (Ia32, X64, ...)') { |c| opts[:sc_cpu] = c }
|
|
27
|
+
opt.on('--exe <exe_fmt>', 'the executable file format to use (PE, ELF, ...)') { |c| opts[:exe_fmt] = c }
|
|
28
|
+
opt.on('--rebase <addr>', 'rebase the loaded file to <addr>') { |a| opts[:rebase] = Integer(a) }
|
|
29
|
+
opt.on('-s <savefile>', 'save the disassembler state after disasm') { |h| opts[:savefile] = h }
|
|
30
|
+
opt.on('-S <addrlist>', '--stop <addrlist>', '--stopaddr <addrlist>', 'do not disassemble past these addresses') { |h| opts[:stopaddr] ||= [] ; opts[:stopaddr] |= h.split ',' }
|
|
31
|
+
opt.on('-P <plugin>', '--plugin <plugin>', 'load a metasm disassembler plugin') { |h| (opts[:plugin] ||= []) << h }
|
|
32
|
+
opt.on('--post-plugin <plugin>', 'load a metasm disassembler plugin after disassembly is finished') { |h| (opts[:post_plugin] ||= []) << h }
|
|
33
|
+
opt.on('-e <code>', '--eval <code>', 'eval a ruby code') { |h| (opts[:hookstr] ||= []) << h }
|
|
34
|
+
opt.on('--benchmark') { opts[:benchmark] = true }
|
|
35
|
+
opt.on('--decompile') { opts[:decompile] = true }
|
|
36
|
+
opt.on('--map <mapfile>') { |f| opts[:map] = f }
|
|
37
|
+
opt.on('-a', '--autoload', 'loads all relevant files with same filename (.h, .map..)') { opts[:autoload] = true }
|
|
38
|
+
opt.on('--fast', 'use disassemble_fast (no backtracking)') { opts[:fast] = true }
|
|
39
|
+
opt.on('-v', '--verbose') { $VERBOSE = true }
|
|
40
|
+
opt.on('-d', '--debug') { $DEBUG = $VERBOSE = true }
|
|
41
|
+
}.parse!(ARGV)
|
|
42
|
+
|
|
43
|
+
exename = ARGV.shift
|
|
44
|
+
|
|
45
|
+
t0 = Time.now if opts[:benchmark]
|
|
46
|
+
|
|
47
|
+
# load the file
|
|
48
|
+
if exename =~ /^live:(.*)/
|
|
49
|
+
raise 'no such live target' if not target = OS.current.find_process($1)
|
|
50
|
+
p target if $VERBOSE
|
|
51
|
+
exe = Shellcode.decode(target.memory, Metasm.const_get(opts[:sc_cpu]).new)
|
|
52
|
+
else
|
|
53
|
+
exefmt = opts[:exe_fmt] ? Metasm.const_get(opts[:exe_fmt]) : AutoExe.orshellcode { Metasm.const_get(opts[:sc_cpu]).new }
|
|
54
|
+
exefmt = exefmt.withcpu(Metasm.const_get(opts[:sc_cpu]).new) if opts[:exe_fmt] == 'Shellcode' and opts[:sc_cpu]
|
|
55
|
+
exe = exefmt.decode_file(exename)
|
|
56
|
+
exe.disassembler.rebase(opts[:rebase]) if opts[:rebase]
|
|
57
|
+
if opts[:autoload]
|
|
58
|
+
basename = exename.sub(/\.\w\w?\w?$/, '')
|
|
59
|
+
opts[:map] ||= basename + '.map' if File.exist?(basename + '.map')
|
|
60
|
+
opts[:cheader] ||= basename + '.h' if File.exist?(basename + '.h')
|
|
61
|
+
(opts[:plugin] ||= []) << (basename + '.rb') if File.exist?(basename + '.rb')
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
# set options
|
|
65
|
+
dasm = exe.init_disassembler
|
|
66
|
+
makeint = lambda { |addr|
|
|
67
|
+
case addr
|
|
68
|
+
when /^[0-9].*h/; addr.to_i(16)
|
|
69
|
+
when /^[0-9]/; Integer(addr)
|
|
70
|
+
else dasm.normalize(addr)
|
|
71
|
+
end
|
|
72
|
+
}
|
|
73
|
+
dasm.load_map opts[:map] if opts[:map]
|
|
74
|
+
dasm.parse_c_file opts[:cheader] if opts[:cheader]
|
|
75
|
+
dasm.backtrace_maxblocks_data = -1 if opts[:nodatatrace]
|
|
76
|
+
dasm.debug_backtrace = true if opts[:debugbacktrace]
|
|
77
|
+
opts[:stopaddr].to_a.each { |addr| dasm.decoded[makeint[addr]] = true }
|
|
78
|
+
opts[:plugin].to_a.each { |p| dasm.load_plugin p }
|
|
79
|
+
opts[:hookstr].to_a.each { |f| eval f }
|
|
80
|
+
|
|
81
|
+
t1 = Time.now if opts[:benchmark]
|
|
82
|
+
# do the work
|
|
83
|
+
begin
|
|
84
|
+
method = opts[:fast] ? :disassemble_fast_deep : :disassemble
|
|
85
|
+
if ARGV.empty?
|
|
86
|
+
exe.send(method)
|
|
87
|
+
else
|
|
88
|
+
exe.send(method, *ARGV.map { |addr| makeint[addr] })
|
|
89
|
+
end
|
|
90
|
+
rescue Interrupt
|
|
91
|
+
puts $!, $!.backtrace
|
|
92
|
+
end
|
|
93
|
+
t2 = Time.now if opts[:benchmark]
|
|
94
|
+
|
|
95
|
+
if opts[:decompile]
|
|
96
|
+
dasm.save_file(opts[:savefile]) if opts[:savefile]
|
|
97
|
+
dasm.decompile(*dasm.entrypoints)
|
|
98
|
+
tdc = Time.now if opts[:benchmark]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
opts[:post_plugin].to_a.each { |p| dasm.load_plugin p }
|
|
102
|
+
|
|
103
|
+
dasm.save_file(opts[:savefile]) if opts[:savefile]
|
|
104
|
+
|
|
105
|
+
# output
|
|
106
|
+
if opts[:outfile]
|
|
107
|
+
File.open(opts[:outfile], 'w') { |fd|
|
|
108
|
+
fd.puts dasm.c_parser if opts[:decompile]
|
|
109
|
+
fd.puts "#if 0" if opts[:decompile]
|
|
110
|
+
dasm.dump(!opts[:nodata]) { |l| fd.puts l }
|
|
111
|
+
fd.puts "#endif" if opts[:decompile]
|
|
112
|
+
}
|
|
113
|
+
elsif not opts[:savefile]
|
|
114
|
+
if opts[:decompile]
|
|
115
|
+
puts dasm.c_parser
|
|
116
|
+
else
|
|
117
|
+
dasm.dump(!opts[:nodata])
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
t3 = Time.now if opts[:benchmark]
|
|
122
|
+
|
|
123
|
+
todate = lambda { |f|
|
|
124
|
+
if f > 5400
|
|
125
|
+
"#{f.to_i/3600}h#{(f.to_i%3600)/60}mn"
|
|
126
|
+
elsif f > 90
|
|
127
|
+
"#{f.to_i/60}mn#{f.to_i%60}s"
|
|
128
|
+
else
|
|
129
|
+
"#{'%.02f' % f}s"
|
|
130
|
+
end
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
puts "durations\n load #{todate[t1-t0]}\n dasm #{todate[t2-t1]}#{"\n decomp "+todate[tdc-t2] if tdc}\n output #{todate[t3-(tdc||t2)]}\n total #{todate[t3-t0]}" if opts[:benchmark]
|
data/samples/dump_upx.rb
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
# Original script and idea by Alexandre GAZET
|
|
4
|
+
#
|
|
5
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
#
|
|
9
|
+
# this script will load an upx-packed windows executable, find its
|
|
10
|
+
# original entrypoint by disassembling the UPX stub, set breakpoint on it,
|
|
11
|
+
# run the program, and dump the loaded image to an executable PE.
|
|
12
|
+
#
|
|
13
|
+
# usage: dump_upx.rb <packed.exe> [<dumped.exe>] [<rva iat>]
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
require 'metasm'
|
|
17
|
+
include Metasm
|
|
18
|
+
|
|
19
|
+
class UPXUnpacker
|
|
20
|
+
# loads the file
|
|
21
|
+
# find the oep by disassembling
|
|
22
|
+
# run it until the oep
|
|
23
|
+
# dump the memory image
|
|
24
|
+
def initialize(file, dumpfile, iat_rva=nil)
|
|
25
|
+
@dumpfile = dumpfile || 'upx-dumped.exe'
|
|
26
|
+
@iat = iat_rva
|
|
27
|
+
|
|
28
|
+
puts 'disassembling UPX loader...'
|
|
29
|
+
pe = PE.decode_file(file)
|
|
30
|
+
@oep = find_oep(pe)
|
|
31
|
+
raise 'cant find oep...' if not @oep
|
|
32
|
+
puts "oep found at #{Expression[@oep]}"
|
|
33
|
+
@baseaddr = pe.optheader.image_base
|
|
34
|
+
@iat -= @baseaddr if @iat > @baseaddr # va => rva
|
|
35
|
+
|
|
36
|
+
@dbg = OS.current.create_process(file).debugger
|
|
37
|
+
puts 'running...'
|
|
38
|
+
debugloop
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# disassemble the upx stub to find a cross-section jump (to the real entrypoint)
|
|
42
|
+
def find_oep(pe)
|
|
43
|
+
dasm = pe.disassemble_fast 'entrypoint'
|
|
44
|
+
|
|
45
|
+
return if not jmp = dasm.decoded.find { |addr, di|
|
|
46
|
+
# check only once per basic block
|
|
47
|
+
next if not di.block_head?
|
|
48
|
+
b = di.block
|
|
49
|
+
# our target has only one follower
|
|
50
|
+
next if b.to_subfuncret.to_a.length != 0 or b.to_normal.to_a.length != 1
|
|
51
|
+
to = b.to_normal.first
|
|
52
|
+
# ignore jump to unmmaped address
|
|
53
|
+
next if not s = dasm.get_section_at(to)
|
|
54
|
+
# ignore jump to same section
|
|
55
|
+
next if dasm.get_section_at(di.address) == s
|
|
56
|
+
|
|
57
|
+
# gotcha !
|
|
58
|
+
true
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# now jmp is a couple [addr, di], we extract and normalize the oep from there
|
|
62
|
+
dasm.normalize(jmp[1].block.to_normal.first)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def debugloop
|
|
66
|
+
# set up a oneshot breakpoint on oep
|
|
67
|
+
@dbg.hwbp(@oep, :x, 1, true) { breakpoint_callback }
|
|
68
|
+
@dbg.run_forever
|
|
69
|
+
puts 'done'
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def breakpoint_callback
|
|
73
|
+
puts 'breakpoint hit !'
|
|
74
|
+
|
|
75
|
+
# dump the process
|
|
76
|
+
# create a genuine PE object from the memory image
|
|
77
|
+
dump = LoadedPE.memdump @dbg.memory, @baseaddr, @oep, @iat
|
|
78
|
+
|
|
79
|
+
# the UPX loader unpacks everything in sections marked read-only in the PE header, make them writeable
|
|
80
|
+
dump.sections.each { |s| s.characteristics |= ['MEM_WRITE'] }
|
|
81
|
+
|
|
82
|
+
# write the PE file to disk
|
|
83
|
+
dump.encode_file @dumpfile
|
|
84
|
+
|
|
85
|
+
puts 'dump complete'
|
|
86
|
+
ensure
|
|
87
|
+
# kill the process
|
|
88
|
+
@dbg.kill
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
if __FILE__ == $0
|
|
93
|
+
# args: packed [unpacked] [iat rva]
|
|
94
|
+
UPXUnpacker.new(ARGV.shift, ARGV.shift, (Integer(ARGV.shift) rescue nil))
|
|
95
|
+
end
|
|
@@ -0,0 +1,1929 @@
|
|
|
1
|
+
# This file is part of Metasm, the Ruby assembly manipulation suite
|
|
2
|
+
# Copyright (C) 2006-2009 Yoann GUILLOT
|
|
3
|
+
#
|
|
4
|
+
# Licence is LGPL, see LICENCE in the top-level directory
|
|
5
|
+
|
|
6
|
+
# This sample hacks in the ruby interpreter to allow dynamic loading of shellcodes as object methods
|
|
7
|
+
# Also it allows raw modifications to the ruby interpreter memory, for all kind of purposes
|
|
8
|
+
# Includes methods to dump the ruby parser AST from the interpreter memory
|
|
9
|
+
# elf/linux/x86 only
|
|
10
|
+
|
|
11
|
+
require 'metasm'
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
module Metasm
|
|
15
|
+
class RubyHack < DynLdr
|
|
16
|
+
# basic C defs for ruby AST - ruby1.8 only !
|
|
17
|
+
RUBY_INTERN_NODE = <<EOS
|
|
18
|
+
struct node {
|
|
19
|
+
long flags;
|
|
20
|
+
char *file;
|
|
21
|
+
long a1;
|
|
22
|
+
long a2;
|
|
23
|
+
long a3;
|
|
24
|
+
};
|
|
25
|
+
#define FL_USHIFT 11
|
|
26
|
+
#define nd_type(n) ((((struct node*)n)->flags >> FL_USHIFT) & 0xff)
|
|
27
|
+
EOS
|
|
28
|
+
NODETYPE = [
|
|
29
|
+
:method, :fbody, :cfunc, :scope, :block,
|
|
30
|
+
:if, :case, :when, :opt_n, :while,
|
|
31
|
+
:until, :iter, :for, :break, :next,
|
|
32
|
+
:redo, :retry, :begin, :rescue, :resbody,
|
|
33
|
+
:ensure, :and, :or, :not, :masgn,
|
|
34
|
+
:lasgn, :dasgn, :dasgn_curr, :gasgn, :iasgn,
|
|
35
|
+
:cdecl, :cvasgn, :cvdecl, :op_asgn1, :op_asgn2,
|
|
36
|
+
:op_asgn_and, :op_asgn_or, :call, :fcall, :vcall,
|
|
37
|
+
:super, :zsuper, :array, :zarray, :hash,
|
|
38
|
+
:return, :yield, :lvar, :dvar, :gvar, # 50
|
|
39
|
+
:ivar, :const, :cvar, :nth_ref, :back_ref,
|
|
40
|
+
:match, :match2, :match3, :lit, :str,
|
|
41
|
+
:dstr, :xstr, :dxstr, :evstr, :dregx,
|
|
42
|
+
:dregx_once, :args, :argscat, :argspush, :splat,
|
|
43
|
+
:to_ary, :svalue, :block_arg, :block_pass, :defn,
|
|
44
|
+
:defs, :alias, :valias, :undef, :class,
|
|
45
|
+
:module, :sclass, :colon2, :colon3, :cref,
|
|
46
|
+
:dot2, :dot3, :flip2, :flip3, :attrset,
|
|
47
|
+
:self, :nil, :true, :false, :defined,
|
|
48
|
+
:newline, :postexe, :alloca, :dmethod, :bmethod, # 100
|
|
49
|
+
:memo, :ifunc, :dsym, :attrasgn, :last
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
new_api_c 'void rb_define_method(uintptr_t, char *, uintptr_t (*)(), int)'
|
|
53
|
+
new_api_c 'void *rb_method_node(uintptr_t, unsigned id)'
|
|
54
|
+
|
|
55
|
+
class << self
|
|
56
|
+
def set_class_method_raw(klass, meth, code, nparams)
|
|
57
|
+
memory_perm(str_ptr(code), code.length, 'rwx')
|
|
58
|
+
rb_define_method(rb_obj_to_value(klass), meth, code, nparams)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def get_method_node_ptr(klass, meth)
|
|
62
|
+
raise "#{klass.inspect} is not a class" if not klass.kind_of? Module
|
|
63
|
+
rb_method_node(rb_obj_to_value(klass), meth.to_sym.to_i)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# sets up rawopcodes as the method implementation for class klass
|
|
67
|
+
# rawopcodes must implement the expected ABI or things will break horribly
|
|
68
|
+
# this method is VERY UNSAFE, and breaks everything put in place by the ruby interpreter
|
|
69
|
+
# use with EXTREME CAUTION
|
|
70
|
+
# nargs arglist
|
|
71
|
+
# -2 self, arg_ary
|
|
72
|
+
# -1 argc, VALUE*argv, self
|
|
73
|
+
# >=0 self, arg0, arg1..
|
|
74
|
+
def set_method_binary(klass, methodname, raw, nargs=nil)
|
|
75
|
+
nargs ||= klass.instance_method(methodname).arity
|
|
76
|
+
if raw.kind_of? EncodedData
|
|
77
|
+
baseaddr = str_ptr(raw.data)
|
|
78
|
+
bd = raw.binding(baseaddr)
|
|
79
|
+
raw.reloc_externals.uniq.each { |ext| bd[ext] = sym_addr(0, ext) or raise "unknown symbol #{ext}" }
|
|
80
|
+
raw.fixup(bd)
|
|
81
|
+
raw = raw.data
|
|
82
|
+
end
|
|
83
|
+
(@@prevent_gc ||= {})[[klass, methodname]] = raw
|
|
84
|
+
set_class_method_raw(klass, methodname.to_s, raw, nargs)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# same as load_binary_method but with an object and not a class
|
|
88
|
+
def set_singleton_method_binary(obj, *a)
|
|
89
|
+
set_method_binary((class << obj ; self ; end), *a)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def read_method_ast(klass, meth)
|
|
93
|
+
read_node get_method_node_ptr(klass, meth)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def read_singleton_method_ast(klass, meth)
|
|
97
|
+
klass = (class << klass ; self ; end)
|
|
98
|
+
read_method_ast(klass, meth)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def read_node(ptr, cur=nil)
|
|
102
|
+
return if ptr == 0 or ptr == 4
|
|
103
|
+
|
|
104
|
+
type = NODETYPE[(memory_read_int(ptr) >> 11) & 0xff]
|
|
105
|
+
v1 = memory_read_int(ptr+8)
|
|
106
|
+
v2 = memory_read_int(ptr+12)
|
|
107
|
+
v3 = memory_read_int(ptr+16)
|
|
108
|
+
|
|
109
|
+
case type
|
|
110
|
+
when :block, :array, :hash
|
|
111
|
+
cur = nil if cur and cur[0] != type
|
|
112
|
+
cur ||= [type]
|
|
113
|
+
cur << read_node(v1)
|
|
114
|
+
n = read_node(v3, cur)
|
|
115
|
+
raise "block->next = #{n.inspect}" if n and n[0] != type
|
|
116
|
+
cur
|
|
117
|
+
when :newline
|
|
118
|
+
read_node(v3) # debug/trace usage only
|
|
119
|
+
when :if
|
|
120
|
+
[type, read_node(v1), read_node(v2), read_node(v3)]
|
|
121
|
+
when :cfunc
|
|
122
|
+
v2 = {0xffffffff => -1, 0xfffffffe => -2, 0xffffffffffffffff => -1, 0xfffffffffffffffe => -2}[v2] || v2
|
|
123
|
+
[type, {:fptr => v1, # c func pointer
|
|
124
|
+
:arity => v2}]
|
|
125
|
+
when :scope
|
|
126
|
+
[type, {:localnr => (v1 != 0 && v1 != 4 ? memory_read_int(v1) : 0), # nr of local vars (+2 for $_/$~)
|
|
127
|
+
:cref => read_node(v2)[1..-1]}, # node, starting point for const/@@var resolution
|
|
128
|
+
read_node(v3)]
|
|
129
|
+
when :cref
|
|
130
|
+
cur = nil if cur and cur[0] != type
|
|
131
|
+
cur ||= [type]
|
|
132
|
+
cur << rb_value_to_obj(v1) if v1 != 0 and v1 != 4
|
|
133
|
+
n = read_node(v3, cur)
|
|
134
|
+
raise "block->next = #{n.inspect}" if n and n[0] != type
|
|
135
|
+
cur
|
|
136
|
+
when :call, :fcall, :vcall
|
|
137
|
+
[type, read_node(v1), v2.id2name, read_node(v3)]
|
|
138
|
+
when :dstr
|
|
139
|
+
ret = [type, [:str, rb_value_to_obj(v1)]]
|
|
140
|
+
if args = read_node(v3)
|
|
141
|
+
raise "#{ret.inspect} with args != array: #{args.inspect}" if args[0] != :array
|
|
142
|
+
ret.concat args[1..-1]
|
|
143
|
+
end
|
|
144
|
+
ret
|
|
145
|
+
when :zarray
|
|
146
|
+
[:array]
|
|
147
|
+
when :lasgn
|
|
148
|
+
[type, v3, read_node(v2)]
|
|
149
|
+
when :iasgn, :dasgn, :dasgn_curr, :gasgn, :cvasgn
|
|
150
|
+
[type, v1.id2name, read_node(v2)]
|
|
151
|
+
when :masgn
|
|
152
|
+
# multiple assignment: a, b = 42 / lambda { |x, y| }.call(1, 2)
|
|
153
|
+
# v3 = remainder storage (a, b, *c = ary => v3=c)
|
|
154
|
+
[type, read_node(v1), read_node(v2), read_node(v3)]
|
|
155
|
+
when :attrasgn
|
|
156
|
+
[type, ((v1 == 1) ? :self : read_node(v1)), v2.id2name, read_node(v3)]
|
|
157
|
+
when :lvar
|
|
158
|
+
[type, v3]
|
|
159
|
+
when :ivar, :dvar, :gvar, :cvar, :const, :attrset
|
|
160
|
+
[type, v1.id2name]
|
|
161
|
+
when :str
|
|
162
|
+
# cannot use _id2ref here, probably the parser does not use standard alloced objects
|
|
163
|
+
s = memory_read(memory_read_int(v1+12), memory_read_int(v1+16))
|
|
164
|
+
[type, s]
|
|
165
|
+
when :lit
|
|
166
|
+
[type, rb_value_to_obj(v1)]
|
|
167
|
+
when :args # specialcased by rb_call0, invalid in rb_eval
|
|
168
|
+
cnt = v3 # nr of required args, copied directly to local_vars
|
|
169
|
+
opt = read_node(v1) # :block to execute for each missing arg / with N optargs specified, skip N 1st statements
|
|
170
|
+
rest = read_node(v2) # catchall arg in def foo(rq1, rq2, *rest)
|
|
171
|
+
[type, cnt, opt, rest]
|
|
172
|
+
when :and, :or
|
|
173
|
+
[type, read_node(v1), read_node(v2)] # shortcircuit
|
|
174
|
+
when :not
|
|
175
|
+
[type, read_node(v2)]
|
|
176
|
+
when :nil, :true, :false, :self
|
|
177
|
+
[type]
|
|
178
|
+
when :redo, :retry
|
|
179
|
+
[type]
|
|
180
|
+
when :case
|
|
181
|
+
# [:case, var_test, [:when, cnd, action, [:when, cnd2, action2, else]]]
|
|
182
|
+
# => [:case, var_test, [:when, cnd, action], [:when, cnd2, action], else]
|
|
183
|
+
cs = [type, read_node(v1), read_node(v2)]
|
|
184
|
+
cs << cs[-1].pop while cs[-1][0] == :when and cs[-1][3]
|
|
185
|
+
cs
|
|
186
|
+
when :when
|
|
187
|
+
# [:when, [:array, [test]], then, else]
|
|
188
|
+
[type, read_node(v1), read_node(v2), read_node(v3)]
|
|
189
|
+
when :iter
|
|
190
|
+
# save a block for the following funcall
|
|
191
|
+
args = read_node(v1) # assignments with nil, not realized, just to store the arg list (multi args -> :masgn)
|
|
192
|
+
body = read_node(v2) # the body statements (multi -> :block)
|
|
193
|
+
subj = read_node(v3) # the stuff which is passed the block, probably a :call
|
|
194
|
+
[type, args, body, subj]
|
|
195
|
+
when :while, :until
|
|
196
|
+
[type, read_node(v1), read_node(v2), v3]
|
|
197
|
+
when :return, :break, :next, :defined
|
|
198
|
+
[type, read_node(v1)]
|
|
199
|
+
when :to_ary
|
|
200
|
+
[type, read_node(v1)]
|
|
201
|
+
when :colon2
|
|
202
|
+
[type, read_node(v1), v2.id2name]
|
|
203
|
+
when :colon3 # ::Stuff
|
|
204
|
+
[type, v2.id2name]
|
|
205
|
+
when :method
|
|
206
|
+
[type, v1, read_node(v2), v3]
|
|
207
|
+
when :alias
|
|
208
|
+
[type, v1, v2, v3] # ?
|
|
209
|
+
when :evstr
|
|
210
|
+
[type, read_node(v2)]
|
|
211
|
+
when :dot2, :dot3
|
|
212
|
+
[type, read_node(v1), read_node(v2)]
|
|
213
|
+
when :splat
|
|
214
|
+
[type, read_node(v1)]
|
|
215
|
+
when :argscat
|
|
216
|
+
[type, read_node(v1), read_node(v2), v3]
|
|
217
|
+
when :block_pass
|
|
218
|
+
# [args, block, receiver]: foo(bar, &baz) => [:bpass, [:array, bar], [:lvar, baz], [:call, 'foo', bar]] (args in v1&v3!)
|
|
219
|
+
[type, read_node(v1), read_node(v2), read_node(v3)]
|
|
220
|
+
when :block_arg
|
|
221
|
+
[type, v1.id2name, v2, v3]
|
|
222
|
+
when :ensure
|
|
223
|
+
[type, read_node(v1), v2, read_node(v3)]
|
|
224
|
+
else
|
|
225
|
+
puts "unhandled #{type.inspect}"
|
|
226
|
+
[type, v1, v2, v3]
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end # class << self
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# a ruby2c C generator for use in the current ruby interpreter
|
|
233
|
+
# generates C suitable for shellcode compilation & insertion in the current interpreter
|
|
234
|
+
# has hardcoded addresses etc
|
|
235
|
+
class RubyLiveCompiler
|
|
236
|
+
attr_accessor :cp
|
|
237
|
+
|
|
238
|
+
RUBY_H = <<EOS
|
|
239
|
+
#{DynLdr::RUBY_H}
|
|
240
|
+
|
|
241
|
+
VALUE rb_ivar_get(VALUE, unsigned);
|
|
242
|
+
VALUE rb_ivar_set(VALUE, unsigned, VALUE);
|
|
243
|
+
VALUE rb_ivar_defined(VALUE, unsigned);
|
|
244
|
+
VALUE rb_cvar_get(VALUE, unsigned);
|
|
245
|
+
VALUE rb_cvar_set(VALUE, unsigned, VALUE, int);
|
|
246
|
+
VALUE rb_gv_get(const char*);
|
|
247
|
+
VALUE rb_gv_set(const char*, VALUE);
|
|
248
|
+
|
|
249
|
+
VALUE rb_ary_new(void);
|
|
250
|
+
VALUE rb_ary_new4(long, VALUE*);
|
|
251
|
+
VALUE rb_ary_push(VALUE, VALUE);
|
|
252
|
+
VALUE rb_ary_pop(VALUE);
|
|
253
|
+
VALUE rb_ary_shift(VALUE);
|
|
254
|
+
VALUE rb_hash_new(void);
|
|
255
|
+
VALUE rb_hash_aset(VALUE, VALUE, VALUE);
|
|
256
|
+
VALUE rb_str_new(const char*, long);
|
|
257
|
+
VALUE rb_str_new2(const char*);
|
|
258
|
+
VALUE rb_str_cat2(VALUE, const char*);
|
|
259
|
+
VALUE rb_str_concat(VALUE, VALUE);
|
|
260
|
+
VALUE rb_str_append(VALUE, VALUE);
|
|
261
|
+
VALUE rb_obj_as_string(VALUE);
|
|
262
|
+
VALUE rb_range_new(VALUE, VALUE, int exclude_end);
|
|
263
|
+
VALUE rb_Array(VALUE); // :splat
|
|
264
|
+
VALUE rb_ary_to_ary(VALUE);
|
|
265
|
+
VALUE rb_hash_aref(VALUE, VALUE);
|
|
266
|
+
VALUE rb_funcall3(VALUE, unsigned, int, VALUE*);
|
|
267
|
+
VALUE rb_singleton_class(VALUE);
|
|
268
|
+
VALUE rb_block_proc(void);
|
|
269
|
+
void rb_define_method(VALUE, char *, VALUE (*)(), int);
|
|
270
|
+
void *rb_method_node(VALUE, unsigned);
|
|
271
|
+
EOS
|
|
272
|
+
|
|
273
|
+
class Fail < RuntimeError
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def self.compile(klass, *methlist)
|
|
277
|
+
@rcp ||= new
|
|
278
|
+
methlist.each { |meth|
|
|
279
|
+
ast = RubyHack.read_method_ast(klass, meth)
|
|
280
|
+
n = @rcp.compile(ast, klass, meth)
|
|
281
|
+
next if not n
|
|
282
|
+
raw = RubyHack.compile_c(@rcp.cp.dump_definition(n)).encoded
|
|
283
|
+
RubyHack.set_method_binary(klass, meth, raw)
|
|
284
|
+
}
|
|
285
|
+
self
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def dump(m=nil)
|
|
289
|
+
m ? @cp.dump_definition(m) : @cp.to_s
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
attr_accessor :optim_hint
|
|
293
|
+
def initialize(cp=nil)
|
|
294
|
+
@cp = cp || DynLdr.host_cpu.new_cparser
|
|
295
|
+
@cp.parse RUBY_H
|
|
296
|
+
@iter_break = nil
|
|
297
|
+
@optim_hint = {}
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# convert a ruby AST to a new C function
|
|
301
|
+
# returns the new function name
|
|
302
|
+
def compile(ast, klass, meth, singleton=false)
|
|
303
|
+
return if not ast
|
|
304
|
+
|
|
305
|
+
# TODO handle arbitrary block/yield constructs
|
|
306
|
+
# TODO analyse to find/optimize numeric locals that never need a ruby VALUE (ie native int vs INT2FIX)
|
|
307
|
+
# TODO detect block/closure exported out of the func & abort compilation
|
|
308
|
+
|
|
309
|
+
@klass = klass
|
|
310
|
+
@meth = meth
|
|
311
|
+
@meth_singleton = singleton
|
|
312
|
+
|
|
313
|
+
mname = escape_varname("m_#{@klass}#{singleton ? '.' : '#'}#{@meth}".gsub('::', '_'))
|
|
314
|
+
@cp.parse "static void #{mname}(VALUE self) { }"
|
|
315
|
+
@cur_cfunc = @cp.toplevel.symbol[mname]
|
|
316
|
+
@cur_cfunc.type.type = value # return type = VALUE, w/o 'missing return statement' warning
|
|
317
|
+
|
|
318
|
+
@scope = @cur_cfunc.initializer
|
|
319
|
+
|
|
320
|
+
case ast[0]
|
|
321
|
+
when :ivar # attr_reader
|
|
322
|
+
ret = fcall('rb_ivar_get', rb_self, rb_intern(ast[1]))
|
|
323
|
+
when :attrset # attr_writer
|
|
324
|
+
compile_args(@cur_cfunc, [nil, 1])
|
|
325
|
+
ret = fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), local(2))
|
|
326
|
+
when :scope # standard ruby function
|
|
327
|
+
@cref = ast[1][:cref]
|
|
328
|
+
if ast[2] and ast[2][0] == :block and ast[2][1] and ast[2][1][0] == :args
|
|
329
|
+
compile_args(@cur_cfunc, ast[2][1])
|
|
330
|
+
end
|
|
331
|
+
want_value = true
|
|
332
|
+
if meth.to_s == 'initialize' and not singleton
|
|
333
|
+
want_value = false
|
|
334
|
+
end
|
|
335
|
+
ret = ast_to_c(ast[2], @scope, want_value)
|
|
336
|
+
ret = rb_nil if not want_value
|
|
337
|
+
#when :cfunc # native ruby extension
|
|
338
|
+
else raise "unhandled function ast #{ast.inspect}"
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
@scope.statements << C::Return.new(ret)
|
|
342
|
+
|
|
343
|
+
mname
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
# return the arity of method 'name' on self
|
|
347
|
+
def method_arity(name=@meth)
|
|
348
|
+
@meth_singleton ? @klass.method(name).arity : @klass.instance_method(name).arity
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# find the scope where constname is defined from @cref
|
|
352
|
+
def resolve_const_owner(constname)
|
|
353
|
+
@cref.find { |cr| cr.constants.map { |c| c.to_s }.include? constname.to_s }
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# checks if ast maps to a constant, returns it if it does
|
|
357
|
+
def check_const(ast)
|
|
358
|
+
case ast[0]
|
|
359
|
+
when :const
|
|
360
|
+
resolve_const_owner(ast[1])
|
|
361
|
+
when :colon2
|
|
362
|
+
if cst = check_const(ast[1])
|
|
363
|
+
cst.const_get(ast[2])
|
|
364
|
+
end
|
|
365
|
+
when :colon3
|
|
366
|
+
::Object.const_get(ast[2])
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def compile_args(func, args)
|
|
371
|
+
case method_arity
|
|
372
|
+
when -1 # args[1] == 0 and (args[2] or args[3])
|
|
373
|
+
compile_args_m1(func, args)
|
|
374
|
+
when -2 # args[1] > 0 and (args[2] or args[3])
|
|
375
|
+
compile_args_m2(func, args)
|
|
376
|
+
else
|
|
377
|
+
# fixed arity = args[1]: VALUE func(VALUE self, VALUE local_2, VALUE local_3)
|
|
378
|
+
args[1].times { |i|
|
|
379
|
+
v = C::Variable.new("local_#{i+2}", value)
|
|
380
|
+
@scope.symbol[v.name] = v
|
|
381
|
+
func.type.args << v
|
|
382
|
+
}
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
# update func prototype to reflect arity -1
|
|
387
|
+
# VALUE func(int argc, VALUE *argv, VALUE self)
|
|
388
|
+
def compile_args_m1(func, args)
|
|
389
|
+
c = C::Variable.new("arg_c", C::BaseType.new(:int, :unsigned))
|
|
390
|
+
v = C::Variable.new("arg_v", C::Pointer.new(value))
|
|
391
|
+
@scope.symbol[c.name] = c
|
|
392
|
+
@scope.symbol[v.name] = v
|
|
393
|
+
func.type.args.unshift v
|
|
394
|
+
func.type.args.unshift c
|
|
395
|
+
|
|
396
|
+
args[1].times { |i|
|
|
397
|
+
local(i+2, C::CExpression[v, :'[]', [i]])
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
if args[2]
|
|
401
|
+
# [:block, [:lasgn, 2, [:lit, 4]]]
|
|
402
|
+
raise Fail, "unhandled vararglist #{args.inspect}" if args[2][0] != :block
|
|
403
|
+
args[2][1..-1].each_with_index { |a, i|
|
|
404
|
+
raise Fail, "unhandled arg #{a.inspect}" if a[0] != :lasgn
|
|
405
|
+
cnd = C::CExpression[c, :>, i]
|
|
406
|
+
thn = C::CExpression[local(a[1], :none), :'=', [v, :'[]', [i]]]
|
|
407
|
+
els = C::Block.new(@scope)
|
|
408
|
+
ast_to_c(a, els, false)
|
|
409
|
+
@scope.statements << C::If.new(cnd, thn, els)
|
|
410
|
+
}
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
if args[3]
|
|
414
|
+
raise Fail, "unhandled vararglist3 #{args.inspect}" if args[3][0] != :lasgn
|
|
415
|
+
skiplen = args[1] + args[2].length - 1
|
|
416
|
+
alloc = fcall('rb_ary_new4', [c, :-, [skiplen]], [v, :+, [skiplen]])
|
|
417
|
+
local(args[3][1], C::CExpression[[c, :>, skiplen], :'?:', [alloc, fcall('rb_ary_new')]])
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# update func prototype to reflect arity -2
|
|
422
|
+
# VALUE func(VALUE self, VALUE arg_array)
|
|
423
|
+
def compile_args_m2(func, args)
|
|
424
|
+
v = C::Variable.new("arglist", value)
|
|
425
|
+
@scope.symbol[v.name] = v
|
|
426
|
+
func.type.args << v
|
|
427
|
+
|
|
428
|
+
args[1].times { |i|
|
|
429
|
+
local(i+2, fcall('rb_ary_shift', v))
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
# populate arguments with default values
|
|
433
|
+
if args[2]
|
|
434
|
+
# [:block, [:lasgn, 2, [:lit, 4]]]
|
|
435
|
+
raise Fail, "unhandled vararglist #{args.inspect}" if args[2][0] != :block
|
|
436
|
+
args[2][1..-1].each { |a|
|
|
437
|
+
raise Fail, "unhandled arg #{a.inspect}" if a[0] != :lasgn
|
|
438
|
+
t = C::CExpression[local(a[1], :none), :'=', fcall('rb_ary_shift', v)]
|
|
439
|
+
e = C::Block.new(@scope)
|
|
440
|
+
ast_to_c([:lasgn, a[1], a[2]], e, false)
|
|
441
|
+
@scope.statements << C::If.new(rb_ary_len(v), t, e)
|
|
442
|
+
}
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
if args[3]
|
|
446
|
+
raise Fail, "unhandled vararglist3 #{args.inspect}" if args[3][0] != :lasgn
|
|
447
|
+
local(args[3][1], C::CExpression[v])
|
|
448
|
+
end
|
|
449
|
+
end
|
|
450
|
+
|
|
451
|
+
# compile a case/when
|
|
452
|
+
# create a real C switch() for Fixnums, and put the others === in the default case
|
|
453
|
+
# XXX will get the wrong order for "case x; when 1; when Fixnum; when 3;" ...
|
|
454
|
+
def compile_case(ast, scope, want_value)
|
|
455
|
+
# this generates
|
|
456
|
+
# var = stuff_to_test()
|
|
457
|
+
# if (var & 1)
|
|
458
|
+
# switch (var >> 1) {
|
|
459
|
+
# case 12:
|
|
460
|
+
# stuff();
|
|
461
|
+
# break;
|
|
462
|
+
# default:
|
|
463
|
+
# goto default_case;
|
|
464
|
+
# }
|
|
465
|
+
# else
|
|
466
|
+
# default_case:
|
|
467
|
+
# if (var == true.object_id || rb_test(rb_funcall(bla, '===', var)))
|
|
468
|
+
# foo();
|
|
469
|
+
# else {
|
|
470
|
+
# default();
|
|
471
|
+
# }
|
|
472
|
+
#
|
|
473
|
+
if want_value == true
|
|
474
|
+
ret = get_new_tmp_var('case', want_value)
|
|
475
|
+
want_value = ret
|
|
476
|
+
elsif want_value
|
|
477
|
+
ret = want_value
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
var = ast_to_c(ast[1], scope, want_value || true)
|
|
481
|
+
if not var.kind_of? C::Variable
|
|
482
|
+
ret ||= get_new_tmp_var('case', want_value)
|
|
483
|
+
scope.statements << C::CExpression[ret, :'=', var]
|
|
484
|
+
var = ret
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
# the scope to put all case int in
|
|
488
|
+
body_int = C::Block.new(scope)
|
|
489
|
+
# the scope to put the if (cs === var) cascade
|
|
490
|
+
body_other_head = body_other = nil
|
|
491
|
+
default = nil
|
|
492
|
+
|
|
493
|
+
ast[2..-1].each { |cs|
|
|
494
|
+
if cs[0] == :when
|
|
495
|
+
raise Fail if cs[1][0] != :array
|
|
496
|
+
|
|
497
|
+
# numeric case, add a case to body_int
|
|
498
|
+
if cs[1][1..-1].all? { |cd| cd[0] == :lit and (cd[1].kind_of? Fixnum or cd[1].kind_of? Range) }
|
|
499
|
+
cs[1][1..-1].each { |cd|
|
|
500
|
+
if cd[1].kind_of? Range
|
|
501
|
+
b = cd[1].begin
|
|
502
|
+
e = cd[1].end
|
|
503
|
+
e -= 1 if cd[1].exclude_end?
|
|
504
|
+
raise Fail unless b.kind_of? Integer and e.kind_of? Integer
|
|
505
|
+
body_int.statements << C::Case.new(b, e, nil)
|
|
506
|
+
else
|
|
507
|
+
body_int.statements << C::Case.new(cd[1], nil, nil)
|
|
508
|
+
end
|
|
509
|
+
}
|
|
510
|
+
cb = C::Block.new(scope)
|
|
511
|
+
v = ast_to_c(cs[2], cb, want_value)
|
|
512
|
+
cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
|
|
513
|
+
cb.statements << C::Break.new
|
|
514
|
+
body_int.statements << cb
|
|
515
|
+
|
|
516
|
+
# non-numeric (or mixed) case, add if ( cs === var )
|
|
517
|
+
else
|
|
518
|
+
cnd = nil
|
|
519
|
+
cs[1][1..-1].each { |cd|
|
|
520
|
+
if (cd[0] == :lit and (cd[1].kind_of?(Fixnum) or cd[1].kind_of?(Symbol))) or
|
|
521
|
+
[:nil, :true, :false].include?(cd[0])
|
|
522
|
+
# true C equality
|
|
523
|
+
cd = C::CExpression[var, :==, ast_to_c(cd, scope)]
|
|
524
|
+
else
|
|
525
|
+
# own block for ast_to_c to honor lazy evaluation
|
|
526
|
+
tb = C::Block.new(scope)
|
|
527
|
+
test = rb_test(rb_funcall(ast_to_c(cd, tb), '===', var), tb)
|
|
528
|
+
# discard own block unless needed
|
|
529
|
+
if tb.statements.empty?
|
|
530
|
+
cd = test
|
|
531
|
+
else
|
|
532
|
+
tb.statements << test
|
|
533
|
+
cd = C::CExpression[tb, value]
|
|
534
|
+
end
|
|
535
|
+
end
|
|
536
|
+
cnd = (cnd ? C::CExpression[cnd, :'||', cd] : cd)
|
|
537
|
+
}
|
|
538
|
+
cb = C::Block.new(scope)
|
|
539
|
+
v = ast_to_c(cs[2], cb, want_value)
|
|
540
|
+
cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
|
|
541
|
+
|
|
542
|
+
fu = C::If.new(cnd, cb, nil)
|
|
543
|
+
|
|
544
|
+
if body_other
|
|
545
|
+
body_other.belse = fu
|
|
546
|
+
else
|
|
547
|
+
body_other_head = fu
|
|
548
|
+
end
|
|
549
|
+
body_other = fu
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
# default case statement
|
|
553
|
+
else
|
|
554
|
+
cb = C::Block.new(scope)
|
|
555
|
+
v = ast_to_c(cs, cb, want_value)
|
|
556
|
+
cb.statements << C::CExpression[ret, :'=', v] if want_value and v != ret
|
|
557
|
+
default = cb
|
|
558
|
+
end
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
# if we use the value of the case, we must add an 'else: nil'
|
|
562
|
+
if want_value and not default
|
|
563
|
+
default = C::Block.new(scope)
|
|
564
|
+
default.statements << C::CExpression[ret, :'=', rb_nil]
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
# assemble everything
|
|
568
|
+
scope.statements <<
|
|
569
|
+
if body_int.statements.empty?
|
|
570
|
+
if body_other
|
|
571
|
+
body_other.belse = default
|
|
572
|
+
body_other_head
|
|
573
|
+
else
|
|
574
|
+
raise Fail, "empty case? #{ast.inspect}" if not default
|
|
575
|
+
default
|
|
576
|
+
end
|
|
577
|
+
else
|
|
578
|
+
if body_other_head
|
|
579
|
+
@default_label_cnt ||= 0
|
|
580
|
+
dfl = "default_label_#{@default_label_cnt += 1}"
|
|
581
|
+
body_other_head = C::Label.new(dfl, body_other_head)
|
|
582
|
+
body_int.statements << C::Case.new('default', nil, C::Goto.new(dfl))
|
|
583
|
+
body_other.belse = default if default
|
|
584
|
+
end
|
|
585
|
+
body_int = C::Switch.new(C::CExpression[var, :>>, 1], body_int)
|
|
586
|
+
C::If.new(C::CExpression[var, :&, 1], body_int, body_other_head)
|
|
587
|
+
end
|
|
588
|
+
|
|
589
|
+
ret
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
# create a C::CExpr[toplevel.symbol[name], :funcall, args]
|
|
593
|
+
# casts int/strings in arglist to CExpr
|
|
594
|
+
def fcall(fname, *arglist)
|
|
595
|
+
args = arglist.map { |a| (a.kind_of?(Integer) or a.kind_of?(String)) ? [a] : a }
|
|
596
|
+
fv = @cp.toplevel.symbol[fname]
|
|
597
|
+
raise "need prototype for #{fname}!" if not fv
|
|
598
|
+
C::CExpression[fv, :funcall, args]
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
# the VALUE typedef
|
|
602
|
+
def value
|
|
603
|
+
@cp.toplevel.symbol['VALUE']
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
# declare a new function variable
|
|
607
|
+
# no initializer if init == :none
|
|
608
|
+
def declare_newvar(name, initializer)
|
|
609
|
+
v = C::Variable.new(name, value)
|
|
610
|
+
v.initializer = initializer if initializer != :none
|
|
611
|
+
@scope.symbol[v.name] = v
|
|
612
|
+
@scope.statements << C::Declaration.new(v)
|
|
613
|
+
v
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# return a string suitable for use as a variable name
|
|
617
|
+
# hexencode any char not in [A-z0-9_]
|
|
618
|
+
def escape_varname(n)
|
|
619
|
+
n.gsub(/[^\w]/) { |c| c.unpack('H*')[0] }
|
|
620
|
+
end
|
|
621
|
+
|
|
622
|
+
# retrieve or create a local var
|
|
623
|
+
# pass :none to avoid initializer
|
|
624
|
+
def get_var(name, initializer=:none)
|
|
625
|
+
name = escape_varname(name)
|
|
626
|
+
@scope.symbol[name] ||= declare_newvar(name, initializer || rb_nil)
|
|
627
|
+
end
|
|
628
|
+
|
|
629
|
+
# create a new temporary variable
|
|
630
|
+
# XXX put_var ?
|
|
631
|
+
def get_new_tmp_var(base=nil, var=nil)
|
|
632
|
+
return var if var.kind_of? C::Variable
|
|
633
|
+
@tmp_var_id ||= 0
|
|
634
|
+
get_var("tmp_#{"#{base}_" if base}#{@tmp_var_id += 1}")
|
|
635
|
+
end
|
|
636
|
+
|
|
637
|
+
# retrieve/create a new local variable with optionnal initializer
|
|
638
|
+
def local(n, init=nil)
|
|
639
|
+
get_var "local_#{n}", init
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
# retrieve/create a new dynamic variable (block argument/variable)
|
|
643
|
+
# pass :none to avoid initializer
|
|
644
|
+
def dvar(n, init=nil)
|
|
645
|
+
get_var "dvar_#{n}", init
|
|
646
|
+
end
|
|
647
|
+
|
|
648
|
+
# retrieve self (1st func arg)
|
|
649
|
+
def rb_self
|
|
650
|
+
@scope.symbol['self']
|
|
651
|
+
end
|
|
652
|
+
|
|
653
|
+
# returns a CExpr casting expr to a VALUE*
|
|
654
|
+
def rb_cast_pvalue(expr, idx)
|
|
655
|
+
C::CExpression[[[expr], C::Pointer.new(value)], :'[]', [idx]]
|
|
656
|
+
end
|
|
657
|
+
|
|
658
|
+
# retrieve the current class, from self->klass
|
|
659
|
+
# XXX will segfault with self.kind_of? Fixnum/true/false/nil/sym
|
|
660
|
+
def rb_selfclass
|
|
661
|
+
rb_cast_pvalue(rb_self, 1)
|
|
662
|
+
end
|
|
663
|
+
|
|
664
|
+
def rb_nil
|
|
665
|
+
C::CExpression[[nil.object_id], value]
|
|
666
|
+
end
|
|
667
|
+
def rb_true
|
|
668
|
+
C::CExpression[[true.object_id], value]
|
|
669
|
+
end
|
|
670
|
+
def rb_false
|
|
671
|
+
C::CExpression[[false.object_id], value]
|
|
672
|
+
end
|
|
673
|
+
|
|
674
|
+
# call rb_intern on a string
|
|
675
|
+
def rb_intern(n)
|
|
676
|
+
# use the current interpreter's value
|
|
677
|
+
C::CExpression[n.to_sym.to_i]
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
# create a rb_funcall construct
|
|
681
|
+
def rb_funcall(recv, meth, *args)
|
|
682
|
+
fcall('rb_funcall', recv, rb_intern(meth), args.length, *args)
|
|
683
|
+
end
|
|
684
|
+
|
|
685
|
+
# ruby bool test of a var
|
|
686
|
+
# assigns to a temporary var, and check against false/nil
|
|
687
|
+
def rb_test(expr, scope)
|
|
688
|
+
if nil.object_id == 0 or false.object_id == 0 # just to be sure
|
|
689
|
+
nf = nil.object_id | false.object_id
|
|
690
|
+
C::CExpression[[expr, :|, nf], :'!=', nf]
|
|
691
|
+
else
|
|
692
|
+
if expr.kind_of? C::Variable
|
|
693
|
+
tmp = expr
|
|
694
|
+
else
|
|
695
|
+
tmp = get_new_tmp_var('test')
|
|
696
|
+
scope.statements << C::CExpression[tmp, :'=', expr]
|
|
697
|
+
end
|
|
698
|
+
C::CExpression[[tmp, :'!=', rb_nil], :'&&', [tmp, :'!=', rb_false]]
|
|
699
|
+
end
|
|
700
|
+
end
|
|
701
|
+
|
|
702
|
+
# generate C code to raise a RuntimeError, reason
|
|
703
|
+
def rb_raise(reason, cls='rb_eRuntimeError')
|
|
704
|
+
fcall('rb_raise', rb_global(cls), reason)
|
|
705
|
+
end
|
|
706
|
+
|
|
707
|
+
# return a C expr equivallent to TYPE(expr) == type for non-immediate types
|
|
708
|
+
# XXX expr evaluated 3 times
|
|
709
|
+
def rb_test_class_type(expr, type)
|
|
710
|
+
C::CExpression[[[expr, :>, [7]], :'&&', [[expr, :&, [3]], :==, [0]]], :'&&', [[rb_cast_pvalue(expr, 0), :&, [0x3f]], :'==', [type]]]
|
|
711
|
+
end
|
|
712
|
+
|
|
713
|
+
# return a C expr equivallent to TYPE(expr) == T_ARRAY
|
|
714
|
+
def rb_test_class_ary(expr)
|
|
715
|
+
rb_test_class_type(expr, 9)
|
|
716
|
+
end
|
|
717
|
+
# ARY_PTR(expr)
|
|
718
|
+
def rb_ary_ptr(expr, idx=nil)
|
|
719
|
+
p = C::CExpression[[rb_cast_pvalue(expr, 4)], C::Pointer.new(value)]
|
|
720
|
+
idx ? C::CExpression[p, :'[]', [idx]] : p
|
|
721
|
+
end
|
|
722
|
+
# ARY_LEN(expr)
|
|
723
|
+
def rb_ary_len(expr)
|
|
724
|
+
rb_cast_pvalue(expr, 2)
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
# TYPE(expr) == T_STRING
|
|
728
|
+
def rb_test_class_string(expr)
|
|
729
|
+
rb_test_class_type(expr, 7)
|
|
730
|
+
end
|
|
731
|
+
# STR_PTR(expr)
|
|
732
|
+
def rb_str_ptr(expr, idx=nil)
|
|
733
|
+
p = C::CExpression[[rb_cast_pvalue(expr, 3)], C::Pointer.new(C::BaseType.new(:char))]
|
|
734
|
+
idx ? C::CExpression[p, :'[]', [idx]] : p
|
|
735
|
+
end
|
|
736
|
+
# STR_LEN(expr)
|
|
737
|
+
def rb_str_len(expr)
|
|
738
|
+
rb_cast_pvalue(expr, 2)
|
|
739
|
+
end
|
|
740
|
+
|
|
741
|
+
def rb_test_class_hash(expr)
|
|
742
|
+
rb_test_class_type(expr, 0xb)
|
|
743
|
+
end
|
|
744
|
+
|
|
745
|
+
# returns a static pointer to the constant
|
|
746
|
+
def rb_const(constname, owner = resolve_const_owner(constname))
|
|
747
|
+
raise Fail, "no dynamic constant resolution #{constname}" if not owner
|
|
748
|
+
cst = owner.const_get(constname)
|
|
749
|
+
C::CExpression[[RubyHack.rb_obj_to_value(cst)], value]
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
# compile a :masgn
|
|
753
|
+
def rb_masgn(ast, scope, want_value)
|
|
754
|
+
raise Fail, "masgn with no rhs #{ast.inspect}" if not ast[2]
|
|
755
|
+
raise Fail, "masgn with no lhs array #{ast.inspect}" if not ast[1] or ast[1][0] != :array
|
|
756
|
+
if not want_value and ast[2][0] == :array and not ast[3] and ast[2].length == ast[1].length
|
|
757
|
+
rb_masgn_optimized(ast, scope)
|
|
758
|
+
return nil.object_id
|
|
759
|
+
end
|
|
760
|
+
full = get_new_tmp_var('masgn', want_value)
|
|
761
|
+
ary = ast_to_c(ast[2], scope, full)
|
|
762
|
+
scope.statements << C::CExpression[full, :'=', ary] if full != ary
|
|
763
|
+
ast[1][1..-1].each_with_index { |e, i|
|
|
764
|
+
raise Fail, "weird masgn lhs #{e.inspect} in #{ast.inspect}" if e[-1] != nil
|
|
765
|
+
# local_42 = full[i]
|
|
766
|
+
e = e.dup
|
|
767
|
+
e[-1] = [:rb2cstmt, rb_ary_ptr(full, i)]
|
|
768
|
+
ast_to_c(e, scope, false)
|
|
769
|
+
}
|
|
770
|
+
if ast[3]
|
|
771
|
+
raise Fail, "weird masgn lhs #{e.inspect} in #{ast.inspect}" if ast[3][-1] != nil
|
|
772
|
+
# local_28 = full[12..-1].to_a
|
|
773
|
+
e = ast[3].dup
|
|
774
|
+
e[-1] = [:call, [:call, [:rb2cvar, full.name], '[]', [:array, [:dot2, [:lit, ast[1].length-1], [:lit, -1]]]], 'to_a']
|
|
775
|
+
ast_to_c(e, scope, false)
|
|
776
|
+
end
|
|
777
|
+
|
|
778
|
+
full
|
|
779
|
+
end
|
|
780
|
+
|
|
781
|
+
def rb_global(cname)
|
|
782
|
+
@cp.toplevel.symbol[cname]
|
|
783
|
+
end
|
|
784
|
+
|
|
785
|
+
# compile an optimized :masgn with rhs.length == lhs.length (no need of a ruby array)
|
|
786
|
+
def rb_masgn_optimized(ast, scope)
|
|
787
|
+
vars = []
|
|
788
|
+
ast[2][1..-1].each { |rhs|
|
|
789
|
+
var = get_new_tmp_var('masgn_opt')
|
|
790
|
+
vars << var
|
|
791
|
+
r = ast_to_c(rhs, scope, var)
|
|
792
|
+
scope.statements << C::CExpression[var, :'=', r] if var != r
|
|
793
|
+
}
|
|
794
|
+
ast[1][1..-1].each { |lhs|
|
|
795
|
+
var = vars.shift
|
|
796
|
+
lhs = lhs.dup
|
|
797
|
+
raise Fail, "weird masgn lhs #{lhs.inspect} in #{ast.inspect}" if lhs[-1] != nil
|
|
798
|
+
lhs[-1] = [:rb2cvar, var.name]
|
|
799
|
+
ast_to_c(lhs, scope, false)
|
|
800
|
+
}
|
|
801
|
+
end
|
|
802
|
+
|
|
803
|
+
# the recursive AST to C compiler
|
|
804
|
+
# may append C statements to scope
|
|
805
|
+
# returns the C::CExpr holding the VALUE of the current ruby statement
|
|
806
|
+
# want_value is an optionnal hint as to the returned VALUE is needed or not
|
|
807
|
+
# if want_value is a C::Variable, the statements should try to populate this var instead of some random tmp var
|
|
808
|
+
# eg to simplify :if encoding unless we have 'foo = if 42;..'
|
|
809
|
+
def ast_to_c(ast, scope, want_value = true)
|
|
810
|
+
ret =
|
|
811
|
+
case ast.to_a[0]
|
|
812
|
+
when :block
|
|
813
|
+
if ast[1]
|
|
814
|
+
ast[1..-2].each { |a| ast_to_c(a, scope, false) }
|
|
815
|
+
ast_to_c(ast.last, scope, want_value)
|
|
816
|
+
end
|
|
817
|
+
|
|
818
|
+
when :lvar
|
|
819
|
+
local(ast[1])
|
|
820
|
+
when :lasgn
|
|
821
|
+
if scope == @scope
|
|
822
|
+
l = local(ast[1], :none)
|
|
823
|
+
else
|
|
824
|
+
# w = 4 if false ; p w => should be nil
|
|
825
|
+
l = local(ast[1])
|
|
826
|
+
end
|
|
827
|
+
st = ast_to_c(ast[2], scope, l)
|
|
828
|
+
scope.statements << C::CExpression[l, :'=', st] if st != l
|
|
829
|
+
l
|
|
830
|
+
when :dvar
|
|
831
|
+
dvar(ast[1])
|
|
832
|
+
when :dasgn_curr
|
|
833
|
+
l = dvar(ast[1])
|
|
834
|
+
st = ast_to_c(ast[2], scope, l)
|
|
835
|
+
scope.statements << C::CExpression[l, :'=', st] if st != l
|
|
836
|
+
l
|
|
837
|
+
when :ivar
|
|
838
|
+
fcall('rb_ivar_get', rb_self, rb_intern(ast[1]))
|
|
839
|
+
when :iasgn
|
|
840
|
+
if want_value
|
|
841
|
+
tmp = get_new_tmp_var("ivar_#{ast[1]}", want_value)
|
|
842
|
+
scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
|
|
843
|
+
scope.statements << fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), tmp)
|
|
844
|
+
tmp
|
|
845
|
+
else
|
|
846
|
+
scope.statements << fcall('rb_ivar_set', rb_self, rb_intern(ast[1]), ast_to_c(ast[2], scope))
|
|
847
|
+
end
|
|
848
|
+
when :cvar
|
|
849
|
+
fcall('rb_cvar_get', rb_selfclass, rb_intern(ast[1]))
|
|
850
|
+
when :cvasgn
|
|
851
|
+
if want_value
|
|
852
|
+
tmp = get_new_tmp_var("cvar_#{ast[1]}", want_value)
|
|
853
|
+
scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
|
|
854
|
+
scope.statements << fcall('rb_cvar_set', rb_selfclass, rb_intern(ast[1]), tmp, rb_false)
|
|
855
|
+
tmp
|
|
856
|
+
else
|
|
857
|
+
scope.statements << fcall('rb_cvar_set', rb_selfclass, rb_intern(ast[1]), ast_to_c(ast[2], scope), rb_false)
|
|
858
|
+
end
|
|
859
|
+
when :gvar
|
|
860
|
+
fcall('rb_gv_get', ast[1])
|
|
861
|
+
when :gasgn
|
|
862
|
+
if want_value
|
|
863
|
+
tmp = get_new_tmp_var("gvar_#{ast[1]}", want_value)
|
|
864
|
+
scope.statements << C::CExpression[tmp, :'=', ast_to_c(ast[2], scope)]
|
|
865
|
+
scope.statements << fcall('rb_gv_set', ast[1], tmp)
|
|
866
|
+
tmp
|
|
867
|
+
else
|
|
868
|
+
scope.statements << fcall('rb_gv_set', ast[1], ast_to_c(ast[2], scope))
|
|
869
|
+
end
|
|
870
|
+
when :attrasgn # foo.bar= 42 (same as :call, except for return value)
|
|
871
|
+
recv = ast_to_c(ast[1], scope)
|
|
872
|
+
raise Fail, "unsupported #{ast.inspect}" if not ast[3] or ast[3][0] != :array
|
|
873
|
+
if ast[3].length != 2
|
|
874
|
+
if ast[2] != '[]=' or ast[3].length != 3
|
|
875
|
+
raise Fail, "unsupported #{ast.inspect}"
|
|
876
|
+
end
|
|
877
|
+
# foo[4] = 2
|
|
878
|
+
idx = ast_to_c(ast[3][1], scope)
|
|
879
|
+
end
|
|
880
|
+
arg = ast_to_c(ast[3].last, scope)
|
|
881
|
+
if want_value
|
|
882
|
+
tmp = get_new_tmp_var('call', want_value)
|
|
883
|
+
scope.statements << C::CExpression[tmp, :'=', arg]
|
|
884
|
+
end
|
|
885
|
+
if idx
|
|
886
|
+
scope.statements << rb_funcall(recv, ast[2], idx, arg)
|
|
887
|
+
else
|
|
888
|
+
scope.statements << rb_funcall(recv, ast[2], arg)
|
|
889
|
+
end
|
|
890
|
+
tmp
|
|
891
|
+
|
|
892
|
+
when :rb2cvar # hax, used in vararg parsing
|
|
893
|
+
get_var(ast[1])
|
|
894
|
+
when :rb2cstmt
|
|
895
|
+
ast[1]
|
|
896
|
+
|
|
897
|
+
when :block_arg
|
|
898
|
+
local(ast[3], fcall('rb_block_proc'))
|
|
899
|
+
|
|
900
|
+
when :lit
|
|
901
|
+
case ast[1]
|
|
902
|
+
when Symbol
|
|
903
|
+
# XXX ID2SYM
|
|
904
|
+
C::CExpression[[rb_intern(ast[1].to_s), :<<, 8], :|, 0xe]
|
|
905
|
+
when Range
|
|
906
|
+
fcall('rb_range_new', ast[1].begin.object_id, ast[1].end.object_id, ast[1].exclude_end? ? 0 : 1)
|
|
907
|
+
else # true/false/nil/fixnum
|
|
908
|
+
ast[1].object_id
|
|
909
|
+
end
|
|
910
|
+
when :self
|
|
911
|
+
rb_self
|
|
912
|
+
when :str
|
|
913
|
+
fcall('rb_str_new2', ast[1])
|
|
914
|
+
when :array
|
|
915
|
+
tmp = get_new_tmp_var('ary', want_value)
|
|
916
|
+
scope.statements << C::CExpression[tmp, :'=', fcall('rb_ary_new')]
|
|
917
|
+
ast[1..-1].each { |e|
|
|
918
|
+
scope.statements << fcall('rb_ary_push', tmp, ast_to_c(e, scope))
|
|
919
|
+
}
|
|
920
|
+
tmp
|
|
921
|
+
when :hash
|
|
922
|
+
raise Fail, "bad #{ast.inspect}" if ast[1][0] != :array
|
|
923
|
+
tmp = get_new_tmp_var('hash', want_value)
|
|
924
|
+
scope.statements << C::CExpression[tmp, :'=', fcall('rb_hash_new')]
|
|
925
|
+
ki = nil
|
|
926
|
+
ast[1][1..-1].each { |k|
|
|
927
|
+
if not ki
|
|
928
|
+
ki = k
|
|
929
|
+
else
|
|
930
|
+
scope.statements << fcall('rb_hash_aset', tmp, ast_to_c(ki, scope), ast_to_c(k, scope))
|
|
931
|
+
ki = nil
|
|
932
|
+
end
|
|
933
|
+
}
|
|
934
|
+
tmp
|
|
935
|
+
|
|
936
|
+
when :iter
|
|
937
|
+
if v = optimize_iter(ast, scope, want_value)
|
|
938
|
+
return v
|
|
939
|
+
end
|
|
940
|
+
# for full support of :iter, we need access to the interpreter's ruby_block private global variable in eval.c
|
|
941
|
+
# we can find it by analysing rb_block_given_p, but this won't work with a static precompiled rubyhack...
|
|
942
|
+
# even with access to ruby_block, there we would need to redo PUSH_BLOCK, create a temporary dvar list,
|
|
943
|
+
# handle [:break, lol], and do all the stack magic reused in rb_yield (probably incl setjmp etc)
|
|
944
|
+
raise Fail, "unsupported iter #{ast[3].inspect} { | #{ast[1].inspect} | #{ast[2].inspect} }"
|
|
945
|
+
|
|
946
|
+
when :call, :vcall, :fcall
|
|
947
|
+
if v = optimize_call(ast, scope, want_value)
|
|
948
|
+
return v
|
|
949
|
+
end
|
|
950
|
+
recv = ((ast[0] == :call) ? ast_to_c(ast[1], scope) : rb_self)
|
|
951
|
+
if not ast[3]
|
|
952
|
+
f = rb_funcall(recv, ast[2])
|
|
953
|
+
elsif ast[3][0] == :array
|
|
954
|
+
args = ast[3][1..-1].map { |a| ast_to_c(a, scope) }
|
|
955
|
+
f = rb_funcall(recv, ast[2], *args)
|
|
956
|
+
elsif ast[3][0] == :splat
|
|
957
|
+
args = ast_to_c(ast[3], scope)
|
|
958
|
+
if not args.kind_of? C::Variable
|
|
959
|
+
tmp = get_new_tmp_var('args', want_value)
|
|
960
|
+
scope.statements << C::CExpression[tmp, :'=', args]
|
|
961
|
+
args = tmp
|
|
962
|
+
end
|
|
963
|
+
f = fcall('rb_funcall3', recv, rb_intern(ast[2]), rb_ary_len(args), rb_ary_ptr(args))
|
|
964
|
+
# elsif ast[3][0] == :argscat
|
|
965
|
+
else
|
|
966
|
+
raise Fail, "unsupported #{ast.inspect}"
|
|
967
|
+
end
|
|
968
|
+
if want_value
|
|
969
|
+
tmp ||= get_new_tmp_var('call', want_value)
|
|
970
|
+
scope.statements << C::CExpression[tmp, :'=', f]
|
|
971
|
+
tmp
|
|
972
|
+
else
|
|
973
|
+
scope.statements << f
|
|
974
|
+
f
|
|
975
|
+
end
|
|
976
|
+
|
|
977
|
+
when :if, :when
|
|
978
|
+
if ast[0] == :when and ast[1][0] == :array
|
|
979
|
+
cnd = nil
|
|
980
|
+
ast[1][1..-1].map { |cd| rb_test(ast_to_c(cd, scope), scope) }.each { |cd|
|
|
981
|
+
cnd = (cnd ? C::CExpression[cnd, :'||', cd] : cd)
|
|
982
|
+
}
|
|
983
|
+
else
|
|
984
|
+
cnd = rb_test(ast_to_c(ast[1], scope), scope)
|
|
985
|
+
end
|
|
986
|
+
|
|
987
|
+
tbdy = C::Block.new(scope)
|
|
988
|
+
ebdy = C::Block.new(scope) if ast[3] or want_value
|
|
989
|
+
|
|
990
|
+
if want_value
|
|
991
|
+
tmp = get_new_tmp_var('if', want_value)
|
|
992
|
+
thn = ast_to_c(ast[2], tbdy, tmp)
|
|
993
|
+
tbdy.statements << C::CExpression[tmp, :'=', thn] if tmp != thn
|
|
994
|
+
if ast[3]
|
|
995
|
+
els = ast_to_c(ast[3], ebdy, tmp)
|
|
996
|
+
else
|
|
997
|
+
# foo = if bar ; baz ; end => nil if !bar
|
|
998
|
+
els = rb_nil
|
|
999
|
+
end
|
|
1000
|
+
ebdy.statements << C::CExpression[tmp, :'=', els] if tmp != els
|
|
1001
|
+
else
|
|
1002
|
+
ast_to_c(ast[2], tbdy, false)
|
|
1003
|
+
ast_to_c(ast[3], ebdy, false)
|
|
1004
|
+
end
|
|
1005
|
+
|
|
1006
|
+
scope.statements << C::If.new(cnd, tbdy, ebdy)
|
|
1007
|
+
|
|
1008
|
+
tmp
|
|
1009
|
+
|
|
1010
|
+
when :while, :until
|
|
1011
|
+
pib = @iter_break
|
|
1012
|
+
@iter_break = nil # XXX foo = while ()...
|
|
1013
|
+
|
|
1014
|
+
body = C::Block.new(scope)
|
|
1015
|
+
if ast[3] == 0 # do .. while();
|
|
1016
|
+
ast_to_c(ast[2], body, false)
|
|
1017
|
+
end
|
|
1018
|
+
t = nil
|
|
1019
|
+
e = C::Break.new
|
|
1020
|
+
t, e = e, t if ast[0] == :until
|
|
1021
|
+
body.statements << C::If.new(rb_test(ast_to_c(ast[1], body), body), t, e)
|
|
1022
|
+
if ast[3] != 0 # do .. while();
|
|
1023
|
+
ast_to_c(ast[2], body, false)
|
|
1024
|
+
end
|
|
1025
|
+
scope.statements << C::For.new(nil, nil, nil, body)
|
|
1026
|
+
|
|
1027
|
+
@iter_break = pib
|
|
1028
|
+
nil.object_id
|
|
1029
|
+
|
|
1030
|
+
when :and, :or, :not
|
|
1031
|
+
# beware lazy evaluation !
|
|
1032
|
+
tmp = get_new_tmp_var('and', want_value)
|
|
1033
|
+
v1 = ast_to_c(ast[1], scope, tmp)
|
|
1034
|
+
# and/or need that tmp has the actual v1 value (returned when shortcircuit)
|
|
1035
|
+
scope.statements << C::CExpression[tmp, :'=', v1] if v1 != tmp
|
|
1036
|
+
v1 = tmp
|
|
1037
|
+
case ast[0]
|
|
1038
|
+
when :and
|
|
1039
|
+
t = C::Block.new(scope)
|
|
1040
|
+
v2 = ast_to_c(ast[2], t, tmp)
|
|
1041
|
+
t.statements << C::CExpression[tmp, :'=', v2] if v2 != tmp
|
|
1042
|
+
when :or
|
|
1043
|
+
e = C::Block.new(scope)
|
|
1044
|
+
v2 = ast_to_c(ast[2], e, tmp)
|
|
1045
|
+
e.statements << C::CExpression[tmp, :'=', v2] if v2 != tmp
|
|
1046
|
+
when :not
|
|
1047
|
+
t = C::CExpression[tmp, :'=', rb_false]
|
|
1048
|
+
e = C::CExpression[tmp, :'=', rb_true]
|
|
1049
|
+
end
|
|
1050
|
+
scope.statements << C::If.new(rb_test(v1, scope), t, e)
|
|
1051
|
+
tmp
|
|
1052
|
+
when :return
|
|
1053
|
+
scope.statements << C::Return.new(ast_to_c(ast[1], scope))
|
|
1054
|
+
nil.object_id
|
|
1055
|
+
when :break
|
|
1056
|
+
if @iter_break
|
|
1057
|
+
v = (ast[1] ? ast_to_c(ast[1], scope, @iter_break) : nil.object_id)
|
|
1058
|
+
scope.statements << C::CExpression[@iter_break, :'=', [[v], value]] if @iter_break != v
|
|
1059
|
+
end
|
|
1060
|
+
scope.statements << C::Break.new
|
|
1061
|
+
nil.object_id
|
|
1062
|
+
|
|
1063
|
+
when nil, :args
|
|
1064
|
+
nil.object_id
|
|
1065
|
+
when :nil
|
|
1066
|
+
rb_nil
|
|
1067
|
+
when :false
|
|
1068
|
+
rb_false
|
|
1069
|
+
when :true
|
|
1070
|
+
rb_true
|
|
1071
|
+
when :const
|
|
1072
|
+
rb_const(ast[1])
|
|
1073
|
+
when :colon2
|
|
1074
|
+
if cst = check_const(ast[1])
|
|
1075
|
+
rb_const(ast[2], cst)
|
|
1076
|
+
else
|
|
1077
|
+
fcall('rb_const_get', ast_to_c(ast[1], scope), rb_intern(ast[2]))
|
|
1078
|
+
end
|
|
1079
|
+
when :colon3
|
|
1080
|
+
rb_const(ast[1], ::Object)
|
|
1081
|
+
when :defined
|
|
1082
|
+
case ast[1][0]
|
|
1083
|
+
when :ivar
|
|
1084
|
+
fcall('rb_ivar_defined', rb_self, rb_intern(ast[1][1]))
|
|
1085
|
+
else
|
|
1086
|
+
raise Fail, "unsupported #{ast.inspect}"
|
|
1087
|
+
end
|
|
1088
|
+
when :masgn
|
|
1089
|
+
# parallel assignment: put everything in an Array, then pop everything back?
|
|
1090
|
+
rb_masgn(ast, scope, want_value)
|
|
1091
|
+
|
|
1092
|
+
when :evstr
|
|
1093
|
+
fcall('rb_obj_as_string', ast_to_c(ast[1], scope))
|
|
1094
|
+
when :dot2, :dot3
|
|
1095
|
+
fcall('rb_range_new', ast_to_c(ast[1], scope), ast_to_c(ast[2], scope), ast[0] == :dot2 ? 0 : 1)
|
|
1096
|
+
when :splat
|
|
1097
|
+
fcall('rb_Array', ast_to_c(ast[1], scope))
|
|
1098
|
+
when :to_ary
|
|
1099
|
+
fcall('rb_ary_to_ary', ast_to_c(ast[1], scope))
|
|
1100
|
+
when :dstr
|
|
1101
|
+
# dynamic string: "foo#{bar}baz"
|
|
1102
|
+
tmp = get_new_tmp_var('dstr')
|
|
1103
|
+
scope.statements << C::CExpression[tmp, :'=', fcall('rb_str_new2', ast[1][1])]
|
|
1104
|
+
ast[2..-1].compact.each { |s|
|
|
1105
|
+
if s[0] == :str # directly append the char*
|
|
1106
|
+
scope.statements << fcall('rb_str_cat2', tmp, s[1])
|
|
1107
|
+
else
|
|
1108
|
+
scope.statements << fcall('rb_str_append', tmp, ast_to_c(s, scope))
|
|
1109
|
+
end
|
|
1110
|
+
}
|
|
1111
|
+
tmp
|
|
1112
|
+
when :case
|
|
1113
|
+
compile_case(ast, scope, want_value)
|
|
1114
|
+
when :ensure
|
|
1115
|
+
# TODO
|
|
1116
|
+
ret = ast_to_c(ast[1], scope, want_value)
|
|
1117
|
+
ast_to_c(ast[3], scope, false)
|
|
1118
|
+
ret
|
|
1119
|
+
else
|
|
1120
|
+
raise Fail, "unsupported #{ast.inspect}"
|
|
1121
|
+
end
|
|
1122
|
+
|
|
1123
|
+
if want_value
|
|
1124
|
+
ret = C::CExpression[[ret], value] if ret.kind_of? Integer or ret.kind_of? String
|
|
1125
|
+
ret
|
|
1126
|
+
end
|
|
1127
|
+
end
|
|
1128
|
+
|
|
1129
|
+
# optional optimization of a call (eg a == 1, c+2, ...)
|
|
1130
|
+
# return nil for normal rb_funcall, or a C::CExpr to use as retval.
|
|
1131
|
+
def optimize_call(ast, scope, want_value)
|
|
1132
|
+
ce = C::CExpression
|
|
1133
|
+
op = ast[2]
|
|
1134
|
+
int = C::BaseType.new(:ptr) # signed VALUE
|
|
1135
|
+
args = ast[3][1..-1] if ast[3] and ast[3][0] == :array
|
|
1136
|
+
arg0 = args[0] if args and args[0]
|
|
1137
|
+
|
|
1138
|
+
if arg0 and arg0[0] == :lit and arg0[1].kind_of? Fixnum
|
|
1139
|
+
# optimize 'x==42', 'x+42', 'x-42'
|
|
1140
|
+
o2 = arg0[1]
|
|
1141
|
+
return if not %w[== > < >= <= + -].include? op
|
|
1142
|
+
if o2 < 0 and ['+', '-'].include? op
|
|
1143
|
+
# need o2 >= 0 for overflow detection
|
|
1144
|
+
op = {'+' => '-', '-' => '+'}[op]
|
|
1145
|
+
o2 = -o2
|
|
1146
|
+
return if not o2.kind_of? Fixnum # -0x40000000
|
|
1147
|
+
end
|
|
1148
|
+
|
|
1149
|
+
int_v = o2.object_id
|
|
1150
|
+
recv = ast_to_c(ast[1], scope)
|
|
1151
|
+
tmp = get_new_tmp_var('opt', want_value)
|
|
1152
|
+
if not recv.kind_of? C::Variable
|
|
1153
|
+
scope.statements << ce[tmp, :'=', recv]
|
|
1154
|
+
recv = tmp
|
|
1155
|
+
end
|
|
1156
|
+
|
|
1157
|
+
case op
|
|
1158
|
+
when '=='
|
|
1159
|
+
# XXX assume == only return true for full equality: if not Fixnum, then always false
|
|
1160
|
+
# which breaks 1.0 == 1 and maybe others, but its ok
|
|
1161
|
+
scope.statements << C::If.new(ce[recv, :'==', [int_v]], ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
|
|
1162
|
+
when '>', '<', '>=', '<='
|
|
1163
|
+
# do the actual comparison on signed >>1 if both Fixnum
|
|
1164
|
+
t = C::If.new(
|
|
1165
|
+
ce[[[[recv], int], :>>, [1]], op.to_sym, [[[int_v], int], :>>, [1]]],
|
|
1166
|
+
ce[tmp, :'=', rb_true],
|
|
1167
|
+
ce[tmp, :'=', rb_false])
|
|
1168
|
+
# fallback to actual rb_funcall
|
|
1169
|
+
e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
|
|
1170
|
+
add_optimized_statement scope, ast[1], recv, 'fixnum' => t, 'other' => e
|
|
1171
|
+
when '+'
|
|
1172
|
+
e = ce[recv, :+, [int_v-1]] # overflow to Bignum ?
|
|
1173
|
+
cnd = ce[[recv, :&, [1]], :'&&', [[[recv], int], :<, [[e], int]]]
|
|
1174
|
+
t = ce[tmp, :'=', e]
|
|
1175
|
+
e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
|
|
1176
|
+
if @optim_hint[ast[1]] == 'fixnum'
|
|
1177
|
+
# add_optimized_statement wont handle the overflow check correctly
|
|
1178
|
+
scope.statements << t
|
|
1179
|
+
else
|
|
1180
|
+
scope.statements << C::If.new(cnd, t, e)
|
|
1181
|
+
end
|
|
1182
|
+
when '-'
|
|
1183
|
+
e = ce[recv, :-, [int_v-1]]
|
|
1184
|
+
cnd = ce[[recv, :&, [1]], :'&&', [[[recv], int], :>, [[e], int]]]
|
|
1185
|
+
t = ce[tmp, :'=', e]
|
|
1186
|
+
e = ce[tmp, :'=', rb_funcall(recv, op, o2.object_id)]
|
|
1187
|
+
if @optim_hint[ast[1]] == 'fixnum'
|
|
1188
|
+
scope.statements << t
|
|
1189
|
+
else
|
|
1190
|
+
scope.statements << C::If.new(cnd, t, e)
|
|
1191
|
+
end
|
|
1192
|
+
end
|
|
1193
|
+
tmp
|
|
1194
|
+
|
|
1195
|
+
# Symbol#==
|
|
1196
|
+
elsif arg0 and arg0[0] == :lit and arg0[1].kind_of? Symbol and op == '=='
|
|
1197
|
+
s_v = ast_to_c(arg0, scope)
|
|
1198
|
+
tmp = get_new_tmp_var('opt', want_value)
|
|
1199
|
+
recv = ast_to_c(ast[1], scope, tmp)
|
|
1200
|
+
if not recv.kind_of? C::Variable
|
|
1201
|
+
scope.statements << ce[tmp, :'=', recv]
|
|
1202
|
+
recv = tmp
|
|
1203
|
+
end
|
|
1204
|
+
|
|
1205
|
+
scope.statements << C::If.new(ce[recv, :'==', [s_v]], ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
|
|
1206
|
+
tmp
|
|
1207
|
+
|
|
1208
|
+
elsif arg0 and op == '<<'
|
|
1209
|
+
tmp = get_new_tmp_var('opt', want_value)
|
|
1210
|
+
recv = ast_to_c(ast[1], scope, tmp)
|
|
1211
|
+
arg = ast_to_c(arg0, scope)
|
|
1212
|
+
if recv != tmp
|
|
1213
|
+
scope.statements << ce[tmp, :'=', recv]
|
|
1214
|
+
recv = tmp
|
|
1215
|
+
end
|
|
1216
|
+
|
|
1217
|
+
ar = fcall('rb_ary_push', recv, arg)
|
|
1218
|
+
st = fcall('rb_str_concat', recv, arg)
|
|
1219
|
+
oth = rb_funcall(recv, op, arg)
|
|
1220
|
+
oth = ce[tmp, :'=', oth] if want_value
|
|
1221
|
+
|
|
1222
|
+
add_optimized_statement scope, ast[1], recv, 'ary' => ar, 'string' => st, 'other' => oth
|
|
1223
|
+
tmp
|
|
1224
|
+
|
|
1225
|
+
elsif arg0 and args.length == 1 and op == '[]'
|
|
1226
|
+
return if ast[1][0] == :const # Expression[42]
|
|
1227
|
+
tmp = get_new_tmp_var('opt', want_value)
|
|
1228
|
+
recv = ast_to_c(ast[1], scope, tmp)
|
|
1229
|
+
if not recv.kind_of? C::Variable
|
|
1230
|
+
scope.statements << ce[tmp, :'=', recv]
|
|
1231
|
+
recv = tmp
|
|
1232
|
+
end
|
|
1233
|
+
|
|
1234
|
+
idx = get_new_tmp_var('idx')
|
|
1235
|
+
arg = ast_to_c(arg0, scope, idx)
|
|
1236
|
+
if not arg.kind_of? C::Variable
|
|
1237
|
+
scope.statements << ce[idx, :'=', arg]
|
|
1238
|
+
arg = idx
|
|
1239
|
+
end
|
|
1240
|
+
idx = ce[[idx], int]
|
|
1241
|
+
|
|
1242
|
+
ar = C::Block.new(scope)
|
|
1243
|
+
ar.statements << ce[idx, :'=', [[[arg], int], :>>, [1]]]
|
|
1244
|
+
ar.statements << C::If.new(ce[idx, :<, [0]], ce[idx, :'=', [idx, :+, rb_ary_len(recv)]], nil)
|
|
1245
|
+
ar.statements << C::If.new(ce[[idx, :<, [0]], :'||', [idx, :>=, [[rb_ary_len(recv)], int]]],
|
|
1246
|
+
ce[tmp, :'=', rb_nil],
|
|
1247
|
+
ce[tmp, :'=', rb_ary_ptr(recv, idx)])
|
|
1248
|
+
st = C::Block.new(scope)
|
|
1249
|
+
st.statements << ce[idx, :'=', [[[arg], int], :>>, [1]]]
|
|
1250
|
+
st.statements << C::If.new(ce[idx, :<, [0]], ce[idx, :'=', [idx, :+, rb_str_len(recv)]], nil)
|
|
1251
|
+
st.statements << C::If.new(ce[[idx, :<, [0]], :'||', [idx, :>=, [[rb_str_len(recv)], int]]],
|
|
1252
|
+
ce[tmp, :'=', rb_nil],
|
|
1253
|
+
ce[tmp, :'=', [[[[rb_str_ptr(recv, idx), :&, [0xff]], :<<, [1]], :|, [1]], value]])
|
|
1254
|
+
hsh = ce[tmp, :'=', fcall('rb_hash_aref', recv, arg)]
|
|
1255
|
+
oth = ce[tmp, :'=', rb_funcall(recv, op, arg)]
|
|
1256
|
+
|
|
1257
|
+
# ary/string only valid with fixnum argument !
|
|
1258
|
+
add_optimized_statement scope, ast[1], recv, 'hash' => hsh, 'other' => oth,
|
|
1259
|
+
'ary_bnd' => ce[tmp, :'=', rb_ary_ptr(recv, ce[[[arg], int], :>>, [1]])],
|
|
1260
|
+
ce[[arg, :&, 1], :'&&', rb_test_class_ary(recv)] => ar,
|
|
1261
|
+
ce[[arg, :&, 1], :'&&', rb_test_class_string(recv)] => st
|
|
1262
|
+
tmp
|
|
1263
|
+
|
|
1264
|
+
elsif ast[1] and not arg0 and op == 'empty?'
|
|
1265
|
+
tmp = get_new_tmp_var('opt', want_value)
|
|
1266
|
+
recv = ast_to_c(ast[1], scope, tmp)
|
|
1267
|
+
if not recv.kind_of? C::Variable
|
|
1268
|
+
scope.statements << ce[tmp, :'=', recv]
|
|
1269
|
+
recv = tmp
|
|
1270
|
+
end
|
|
1271
|
+
|
|
1272
|
+
ar = C::If.new(rb_ary_len(recv), ce[tmp, :'=', rb_false], ce[tmp, :'=', rb_true])
|
|
1273
|
+
|
|
1274
|
+
add_optimized_statement scope, ast[1], recv, 'ary' => ar,
|
|
1275
|
+
'other' => ce[tmp, :'=', rb_funcall(recv, op)]
|
|
1276
|
+
tmp
|
|
1277
|
+
|
|
1278
|
+
elsif ast[1] and not arg0 and op == 'pop'
|
|
1279
|
+
tmp = get_new_tmp_var('opt', want_value)
|
|
1280
|
+
recv = ast_to_c(ast[1], scope, tmp)
|
|
1281
|
+
if not recv.kind_of? C::Variable
|
|
1282
|
+
scope.statements << ce[tmp, :'=', recv]
|
|
1283
|
+
recv = tmp
|
|
1284
|
+
end
|
|
1285
|
+
|
|
1286
|
+
t = fcall('rb_ary_pop', recv)
|
|
1287
|
+
e = rb_funcall(recv, op)
|
|
1288
|
+
if want_value
|
|
1289
|
+
t = ce[tmp, :'=', t]
|
|
1290
|
+
e = ce[tmp, :'=', e]
|
|
1291
|
+
end
|
|
1292
|
+
|
|
1293
|
+
add_optimized_statement scope, ast[1], recv, 'ary' => t, 'other' => e
|
|
1294
|
+
|
|
1295
|
+
tmp
|
|
1296
|
+
|
|
1297
|
+
elsif ast[1] and op == 'kind_of?' and arg0 and (arg0[0] == :const or arg0[0] == :colon3)
|
|
1298
|
+
# TODO check const maps to toplevel when :const
|
|
1299
|
+
test =
|
|
1300
|
+
case arg0[1]
|
|
1301
|
+
when 'Symbol'
|
|
1302
|
+
tmp = get_new_tmp_var('kindof', want_value)
|
|
1303
|
+
ce[[ast_to_c(ast[1], scope, tmp), :'&', [0xf]], :'==', [0xe]]
|
|
1304
|
+
#when 'Numeric', 'Integer'
|
|
1305
|
+
when 'Fixnum'
|
|
1306
|
+
tmp = get_new_tmp_var('kindof', want_value)
|
|
1307
|
+
ce[ast_to_c(ast[1], scope, tmp), :'&', [0x1]]
|
|
1308
|
+
when 'Array'
|
|
1309
|
+
rb_test_class_ary(ast_to_c(ast[1], scope))
|
|
1310
|
+
when 'String'
|
|
1311
|
+
rb_test_class_string(ast_to_c(ast[1], scope))
|
|
1312
|
+
else return
|
|
1313
|
+
end
|
|
1314
|
+
puts "shortcut may be incorrect for #{ast.inspect}" if arg0[0] == :const
|
|
1315
|
+
tmp ||= get_new_tmp_var('kindof', want_value)
|
|
1316
|
+
scope.statements << C::If.new(test, ce[tmp, :'=', rb_true], ce[tmp, :'=', rb_false])
|
|
1317
|
+
tmp
|
|
1318
|
+
|
|
1319
|
+
elsif not ast[1] or ast[1] == [:self]
|
|
1320
|
+
optimize_call_static(ast, scope, want_value)
|
|
1321
|
+
end
|
|
1322
|
+
end
|
|
1323
|
+
|
|
1324
|
+
# check if the var falls in an optim_hint, if so generate only selected code
|
|
1325
|
+
# optim is a hash varclass (keyof @optim_hint) => c_stmt
|
|
1326
|
+
# optim key can also be a C::Stmt that is used in the If clause
|
|
1327
|
+
# if optim['ary'] == optim['ary_bnd'], you can omit the latter
|
|
1328
|
+
# must have an 'other' key that is calls the generic ruby method
|
|
1329
|
+
def add_optimized_statement(scope, varid, varc, optim={})
|
|
1330
|
+
cat = @optim_hint[varid]
|
|
1331
|
+
cat = 'ary' if cat == 'ary_bnd' and not optim['ary_bnd']
|
|
1332
|
+
if not st = optim[cat]
|
|
1333
|
+
st = optim['other']
|
|
1334
|
+
if not cat and optim.keys.all? { |k| k.kind_of? String }
|
|
1335
|
+
# no need to cascade if we have a hash and can optim ary only
|
|
1336
|
+
optim.each { |i, s|
|
|
1337
|
+
case i
|
|
1338
|
+
when 'ary'; st = C::If.new(rb_test_class_ary(varc), s, st)
|
|
1339
|
+
when 'hash'; st = C::If.new(rb_test_class_hash(varc), s, st)
|
|
1340
|
+
when 'string'; st = C::If.new(rb_test_class_string(varc), s, st)
|
|
1341
|
+
when 'other'; # already done as default case
|
|
1342
|
+
when 'fixnum'; # add test last
|
|
1343
|
+
when C::Statement; st = C::If.new(i, s, st)
|
|
1344
|
+
end
|
|
1345
|
+
}
|
|
1346
|
+
if fs = optim['fixnum']
|
|
1347
|
+
# first test to perform (fast path)
|
|
1348
|
+
st = C::If.new(C::CExpression[varc, :&, 1], fs, st)
|
|
1349
|
+
end
|
|
1350
|
+
end
|
|
1351
|
+
end
|
|
1352
|
+
scope.statements << st
|
|
1353
|
+
end
|
|
1354
|
+
|
|
1355
|
+
# return ptr, arity
|
|
1356
|
+
# ptr is a CExpr pointing to the C func implementing klass#method
|
|
1357
|
+
def get_cfuncptr(klass, method, singleton=false)
|
|
1358
|
+
cls = singleton ? (class << klass ; self ; end) : klass
|
|
1359
|
+
ptr = RubyHack.get_method_node_ptr(cls, method)
|
|
1360
|
+
return if ptr == 0
|
|
1361
|
+
ftype = RubyHack::NODETYPE[(RubyHack.memory_read_int(ptr) >> 11) & 0xff]
|
|
1362
|
+
return if ftype != :cfunc
|
|
1363
|
+
fast = RubyHack.read_node(ptr)
|
|
1364
|
+
arity = fast[1][:arity]
|
|
1365
|
+
fptr = fast[1][:fptr]
|
|
1366
|
+
|
|
1367
|
+
fproto = C::Function.new(value, [])
|
|
1368
|
+
case arity
|
|
1369
|
+
when -1; fproto.args << C::Variable.new(nil, C::BaseType.new(:int)) << C::Variable.new(nil, C::Pointer.new(value)) << C::Variable.new(nil, value)
|
|
1370
|
+
when -2; fproto.args << C::Variable.new(nil, value) << C::Variable.new(nil, value)
|
|
1371
|
+
else (arity+1).times { fproto.args << C::Variable.new(nil, value) }
|
|
1372
|
+
end
|
|
1373
|
+
|
|
1374
|
+
C::CExpression[[fptr], C::Pointer.new(fproto)]
|
|
1375
|
+
end
|
|
1376
|
+
|
|
1377
|
+
# call C funcs directly
|
|
1378
|
+
# assume private function calls are not virtual and hardlink them here
|
|
1379
|
+
def optimize_call_static(ast, scope, want_value)
|
|
1380
|
+
arity = method_arity(ast[2]) rescue return
|
|
1381
|
+
if ast[2].to_s == @meth.to_s
|
|
1382
|
+
# self is recursive
|
|
1383
|
+
fptr = @cur_cfunc
|
|
1384
|
+
else
|
|
1385
|
+
fptr = get_cfuncptr(@klass, ast[2], @meth_singleton)
|
|
1386
|
+
return if not fptr
|
|
1387
|
+
end
|
|
1388
|
+
|
|
1389
|
+
c_arglist = []
|
|
1390
|
+
|
|
1391
|
+
if not ast[3]
|
|
1392
|
+
args = []
|
|
1393
|
+
elsif ast[3][0] == :array
|
|
1394
|
+
args = ast[3][1..-1]
|
|
1395
|
+
elsif ast[3][0] == :splat
|
|
1396
|
+
args = ast_to_c(ast[3], scope)
|
|
1397
|
+
if arity != -2 and !args.kind_of?(C::Variable)
|
|
1398
|
+
tmp = get_new_tmp_var('arg')
|
|
1399
|
+
scope.statements << C::CExpression[tmp, :'=', args]
|
|
1400
|
+
args = tmp
|
|
1401
|
+
end
|
|
1402
|
+
case arity
|
|
1403
|
+
when -2
|
|
1404
|
+
c_arglist << rb_self << args
|
|
1405
|
+
when -1
|
|
1406
|
+
c_arglist << [rb_ary_len(args)] << rb_ary_ptr(args) << rb_self
|
|
1407
|
+
else
|
|
1408
|
+
cnd = C::CExpression[rb_ary_len(args), :'!=', [arity]]
|
|
1409
|
+
scope.statements << C::If.new(cnd, rb_raise("#{arity} args expected", 'rb_eArgumentError'), nil)
|
|
1410
|
+
|
|
1411
|
+
c_arglist << rb_self
|
|
1412
|
+
arity.times { |i| c_arglist << rb_ary_ptr(args, i) }
|
|
1413
|
+
end
|
|
1414
|
+
arity = :canttouchthis
|
|
1415
|
+
else return # TODO
|
|
1416
|
+
end
|
|
1417
|
+
|
|
1418
|
+
case arity
|
|
1419
|
+
when :canttouchthis
|
|
1420
|
+
when -2
|
|
1421
|
+
arg = get_new_tmp_var('arg')
|
|
1422
|
+
scope.statements << C::CExpression[arg, :'=', fcall('rb_ary_new')]
|
|
1423
|
+
args.each { |a|
|
|
1424
|
+
scope.statements << fcall('rb_ary_push', arg, ast_to_c(a, scope))
|
|
1425
|
+
}
|
|
1426
|
+
c_arglist << rb_self << arg
|
|
1427
|
+
|
|
1428
|
+
when -1
|
|
1429
|
+
case args.length
|
|
1430
|
+
when 0
|
|
1431
|
+
argv = C::CExpression[[0], C::Pointer.new(value)]
|
|
1432
|
+
when 1
|
|
1433
|
+
val = ast_to_c(args[0], scope)
|
|
1434
|
+
if not val.kind_of? C::Variable
|
|
1435
|
+
argv = get_new_tmp_var('argv')
|
|
1436
|
+
scope.statements << C::CExpression[argv, :'=', val]
|
|
1437
|
+
val = argv
|
|
1438
|
+
end
|
|
1439
|
+
argv = C::CExpression[:'&', val]
|
|
1440
|
+
else
|
|
1441
|
+
argv = get_new_tmp_var('argv')
|
|
1442
|
+
argv.type = C::Array.new(value, args.length)
|
|
1443
|
+
args.each_with_index { |a, i|
|
|
1444
|
+
val = ast_to_c(a, scope)
|
|
1445
|
+
scope.statements << C::CExpression[[argv, :'[]', [i]], :'=', val]
|
|
1446
|
+
}
|
|
1447
|
+
end
|
|
1448
|
+
c_arglist << [args.length] << argv << rb_self
|
|
1449
|
+
|
|
1450
|
+
else
|
|
1451
|
+
c_arglist << rb_self
|
|
1452
|
+
args.each { |a|
|
|
1453
|
+
va = get_new_tmp_var('arg')
|
|
1454
|
+
val = ast_to_c(a, scope, va)
|
|
1455
|
+
scope.statements << C::CExpression[va, :'=', val] if val != va
|
|
1456
|
+
c_arglist << va
|
|
1457
|
+
}
|
|
1458
|
+
end
|
|
1459
|
+
|
|
1460
|
+
f = C::CExpression[fptr, :funcall, c_arglist]
|
|
1461
|
+
if want_value
|
|
1462
|
+
ret = get_new_tmp_var('ccall', want_value)
|
|
1463
|
+
scope.statements << C::CExpression[ret, :'=', f]
|
|
1464
|
+
ret
|
|
1465
|
+
else
|
|
1466
|
+
scope.statements << f
|
|
1467
|
+
end
|
|
1468
|
+
end
|
|
1469
|
+
|
|
1470
|
+
def optimize_iter(ast, scope, want_value)
|
|
1471
|
+
b_args, b_body, b_recv = ast[1, 3]
|
|
1472
|
+
|
|
1473
|
+
old_ib = @iter_break
|
|
1474
|
+
if want_value
|
|
1475
|
+
# a new tmpvar, so we can overwrite it in 'break :foo'
|
|
1476
|
+
@iter_break = get_new_tmp_var('iterbreak')
|
|
1477
|
+
else
|
|
1478
|
+
@iter_break = nil
|
|
1479
|
+
end
|
|
1480
|
+
|
|
1481
|
+
if b_recv[0] == :call and b_recv[2] == 'reverse_each'
|
|
1482
|
+
# convert ary.reverse_each to ary.reverse.each
|
|
1483
|
+
b_recv = b_recv.dup
|
|
1484
|
+
b_recv[1] = [:call, b_recv[1], 'reverse']
|
|
1485
|
+
b_recv[2] = 'each'
|
|
1486
|
+
elsif b_recv[0] == :call and b_recv[2] == 'each_key'
|
|
1487
|
+
# convert hash.each_key to hash.keys.each
|
|
1488
|
+
b_recv = b_recv.dup
|
|
1489
|
+
b_recv[1] = [:call, b_recv[1], 'keys']
|
|
1490
|
+
b_recv[2] = 'each'
|
|
1491
|
+
end
|
|
1492
|
+
|
|
1493
|
+
# loop { }
|
|
1494
|
+
if b_recv[0] == :fcall and b_recv[2] == 'loop'
|
|
1495
|
+
body = C::Block.new(scope)
|
|
1496
|
+
ast_to_c(b_body, body, false)
|
|
1497
|
+
scope.statements << C::For.new(nil, nil, nil, body)
|
|
1498
|
+
|
|
1499
|
+
# int.times { |i| }
|
|
1500
|
+
elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'times'
|
|
1501
|
+
limit = get_new_tmp_var('limit')
|
|
1502
|
+
recv = ast_to_c(b_recv[1], scope, limit)
|
|
1503
|
+
scope.statements << C::If.new(C::CExpression[:'!', [recv, :&, 1]], rb_raise('only Fixnum#times handled'), nil)
|
|
1504
|
+
if want_value
|
|
1505
|
+
scope.statements << C::CExpression[@iter_break, :'=', recv]
|
|
1506
|
+
end
|
|
1507
|
+
scope.statements << C::CExpression[limit, :'=', [recv, :>>, 1]]
|
|
1508
|
+
cntr = get_new_tmp_var('cntr')
|
|
1509
|
+
cntr.type = C::BaseType.new(:int, :unsigned)
|
|
1510
|
+
body = C::Block.new(scope)
|
|
1511
|
+
if b_args and b_args[0] == :dasgn_curr
|
|
1512
|
+
body.statements << C::CExpression[dvar(b_args[1]), :'=', [[cntr, :<<, 1], :|, 1]]
|
|
1513
|
+
end
|
|
1514
|
+
ast_to_c(b_body, body, false)
|
|
1515
|
+
scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, limit], C::CExpression[:'++', cntr], body)
|
|
1516
|
+
|
|
1517
|
+
# ary.each { |e| }
|
|
1518
|
+
elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'each' and b_args and
|
|
1519
|
+
b_args[0] == :dasgn_curr
|
|
1520
|
+
ary = get_new_tmp_var('ary')
|
|
1521
|
+
recv = ast_to_c(b_recv[1], scope, ary)
|
|
1522
|
+
scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
|
|
1523
|
+
scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#each { |e| } handled'))
|
|
1524
|
+
if want_value
|
|
1525
|
+
scope.statements << C::CExpression[@iter_break, :'=', ary]
|
|
1526
|
+
end
|
|
1527
|
+
cntr = get_new_tmp_var('cntr')
|
|
1528
|
+
cntr.type = C::BaseType.new(:int, :unsigned)
|
|
1529
|
+
body = C::Block.new(scope)
|
|
1530
|
+
if b_args and b_args[0] == :dasgn_curr
|
|
1531
|
+
body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
|
|
1532
|
+
end
|
|
1533
|
+
ast_to_c(b_body, body, false)
|
|
1534
|
+
scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
|
|
1535
|
+
|
|
1536
|
+
# ary.find { |e| }
|
|
1537
|
+
elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'find' and b_args and
|
|
1538
|
+
b_args[0] == :dasgn_curr
|
|
1539
|
+
ary = get_new_tmp_var('ary')
|
|
1540
|
+
recv = ast_to_c(b_recv[1], scope, ary)
|
|
1541
|
+
scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
|
|
1542
|
+
scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#find { |e| } handled'))
|
|
1543
|
+
if want_value
|
|
1544
|
+
scope.statements << C::CExpression[@iter_break, :'=', rb_nil]
|
|
1545
|
+
end
|
|
1546
|
+
cntr = get_new_tmp_var('cntr')
|
|
1547
|
+
cntr.type = C::BaseType.new(:int, :unsigned)
|
|
1548
|
+
body = C::Block.new(scope)
|
|
1549
|
+
if b_args and b_args[0] == :dasgn_curr
|
|
1550
|
+
body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
|
|
1551
|
+
end
|
|
1552
|
+
# same as #each up to this point (except default retval), now add a 'if (body_value) break ary[cntr];'
|
|
1553
|
+
# XXX 'find { next true }'
|
|
1554
|
+
|
|
1555
|
+
found = ast_to_c(b_body, body)
|
|
1556
|
+
t = C::Block.new(body)
|
|
1557
|
+
t.statements << C::CExpression[@iter_break, :'=', rb_ary_ptr(ary, cntr)]
|
|
1558
|
+
t.statements << C::Break.new
|
|
1559
|
+
body.statements << C::If.new(rb_test(found, body), t, nil)
|
|
1560
|
+
|
|
1561
|
+
scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
|
|
1562
|
+
|
|
1563
|
+
# ary.map { |e| }
|
|
1564
|
+
elsif b_recv[0] == :call and not b_recv[3] and b_recv[2] == 'map' and b_args and
|
|
1565
|
+
b_args[0] == :dasgn_curr
|
|
1566
|
+
ary = get_new_tmp_var('ary')
|
|
1567
|
+
recv = ast_to_c(b_recv[1], scope, ary)
|
|
1568
|
+
scope.statements << C::CExpression[ary, :'=', recv] if ary != recv
|
|
1569
|
+
scope.statements << C::If.new(rb_test_class_ary(ary), nil, rb_raise('only Array#map { |e| } handled'))
|
|
1570
|
+
if want_value
|
|
1571
|
+
scope.statements << C::CExpression[@iter_break, :'=', fcall('rb_ary_new')]
|
|
1572
|
+
end
|
|
1573
|
+
cntr = get_new_tmp_var('cntr')
|
|
1574
|
+
cntr.type = C::BaseType.new(:int, :unsigned)
|
|
1575
|
+
body = C::Block.new(scope)
|
|
1576
|
+
if b_args and b_args[0] == :dasgn_curr
|
|
1577
|
+
body.statements << C::CExpression[dvar(b_args[1]), :'=', [rb_ary_ptr(ary), :'[]', [cntr]]]
|
|
1578
|
+
end
|
|
1579
|
+
# same as #each up to this point (except default retval), now add a '@iter_break << body_value'
|
|
1580
|
+
# XXX 'next' unhandled
|
|
1581
|
+
|
|
1582
|
+
val = ast_to_c(b_body, body)
|
|
1583
|
+
body.statements << fcall('rb_ary_push', @iter_break, val)
|
|
1584
|
+
|
|
1585
|
+
scope.statements << C::For.new(C::CExpression[cntr, :'=', [[0], cntr.type]], C::CExpression[cntr, :<, rb_ary_len(ary)], C::CExpression[:'++', cntr], body)
|
|
1586
|
+
|
|
1587
|
+
else
|
|
1588
|
+
@iter_break = old_ib
|
|
1589
|
+
return
|
|
1590
|
+
end
|
|
1591
|
+
|
|
1592
|
+
ret = @iter_break
|
|
1593
|
+
@iter_break = old_ib
|
|
1594
|
+
ret || nil.object_id
|
|
1595
|
+
end
|
|
1596
|
+
end
|
|
1597
|
+
|
|
1598
|
+
# a ruby2c C generator for use in the any ruby interpreter (generates C suitable for use as a standard Ruby extension)
|
|
1599
|
+
class RubyStaticCompiler < RubyLiveCompiler
|
|
1600
|
+
# add a new ruby function to the current @cp
|
|
1601
|
+
def self.compile(klass, *methlist)
|
|
1602
|
+
@rcp ||= new
|
|
1603
|
+
methlist.each { |meth|
|
|
1604
|
+
ast = RubyHack.read_method_ast(klass, meth)
|
|
1605
|
+
@rcp.compile(ast, klass, meth)
|
|
1606
|
+
}
|
|
1607
|
+
self
|
|
1608
|
+
end
|
|
1609
|
+
|
|
1610
|
+
def self.compile_singleton(klass, *methlist)
|
|
1611
|
+
@rcp ||= new
|
|
1612
|
+
methlist.each { |meth|
|
|
1613
|
+
ast = RubyHack.read_singleton_method_ast(klass, meth)
|
|
1614
|
+
@rcp.compile(ast, klass, meth, true)
|
|
1615
|
+
}
|
|
1616
|
+
self
|
|
1617
|
+
end
|
|
1618
|
+
|
|
1619
|
+
def self.dump
|
|
1620
|
+
<<EOS + @rcp.cp.dump_definition('Init_compiledruby')
|
|
1621
|
+
#ifdef __ELF__
|
|
1622
|
+
asm .pt_gnu_stack rw;
|
|
1623
|
+
#endif
|
|
1624
|
+
EOS
|
|
1625
|
+
end
|
|
1626
|
+
|
|
1627
|
+
def dump(m="Init_compiledruby")
|
|
1628
|
+
m ? @cp.dump_definition(m, 'do_init_once') : @cp.to_s
|
|
1629
|
+
end
|
|
1630
|
+
|
|
1631
|
+
def initialize(cp=nil)
|
|
1632
|
+
super(cp)
|
|
1633
|
+
|
|
1634
|
+
@cp.parse <<EOS
|
|
1635
|
+
// static VALUE method(VALUE self, VALUE arg0, VALUE arg1) { return (VALUE)0; }
|
|
1636
|
+
// static VALUE const_Lol;
|
|
1637
|
+
static void do_init_once(void) {
|
|
1638
|
+
// const_Lol = rb_const_get(*rb_cObject, rb_intern("Lol"));
|
|
1639
|
+
// rb_define_method(const_Lol, "method", method, 2);
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
int Init_compiledruby(void) __attribute__((export)) {
|
|
1643
|
+
// use a separate func to avoid having to append statements before the 'return'
|
|
1644
|
+
do_init_once();
|
|
1645
|
+
return 0;
|
|
1646
|
+
}
|
|
1647
|
+
EOS
|
|
1648
|
+
end
|
|
1649
|
+
|
|
1650
|
+
# returns the 'do_init_once' function body
|
|
1651
|
+
def init
|
|
1652
|
+
@cp.toplevel.symbol['do_init_once'].initializer
|
|
1653
|
+
end
|
|
1654
|
+
|
|
1655
|
+
def compile(ast, klass, method, singleton=false)
|
|
1656
|
+
@compiled_func_cache ||= {}
|
|
1657
|
+
|
|
1658
|
+
mname = super(ast, klass, method, singleton)
|
|
1659
|
+
return if not mname
|
|
1660
|
+
|
|
1661
|
+
@compiled_func_cache[[klass, method.to_s, singleton]] = @cur_cfunc
|
|
1662
|
+
|
|
1663
|
+
cls = rb_const(nil, klass)
|
|
1664
|
+
|
|
1665
|
+
init.statements << fcall("rb_define#{'_singleton' if singleton}_method", cls, method.to_s, @cur_cfunc, method_arity)
|
|
1666
|
+
|
|
1667
|
+
mname
|
|
1668
|
+
end
|
|
1669
|
+
|
|
1670
|
+
def declare_newtopvar(name, initializer, type=value)
|
|
1671
|
+
v = C::Variable.new(name, type)
|
|
1672
|
+
v.storage = :static
|
|
1673
|
+
@cp.toplevel.symbol[v.name] = v
|
|
1674
|
+
pos = @cp.toplevel.statements.index @cp.toplevel.statements.find { |st|
|
|
1675
|
+
st.kind_of? C::Declaration and st.var.type.kind_of? C::Function and st.var.initializer
|
|
1676
|
+
} || -1
|
|
1677
|
+
@cp.toplevel.statements.insert pos, C::Declaration.new(v)
|
|
1678
|
+
|
|
1679
|
+
if initializer
|
|
1680
|
+
pos = -1
|
|
1681
|
+
if name =~ /^intern_/
|
|
1682
|
+
pos = 0
|
|
1683
|
+
init.statements.each { |st|
|
|
1684
|
+
break unless st.kind_of? C::CExpression and st.op == :'=' and st.lexpr.kind_of? C::Variable and st.lexpr.name < name
|
|
1685
|
+
pos += 1
|
|
1686
|
+
}
|
|
1687
|
+
end
|
|
1688
|
+
init.statements.insert(pos, C::CExpression[v, :'=', initializer])
|
|
1689
|
+
end
|
|
1690
|
+
|
|
1691
|
+
v
|
|
1692
|
+
end
|
|
1693
|
+
|
|
1694
|
+
def rb_intern(sym)
|
|
1695
|
+
n = escape_varname("intern_#{sym}")
|
|
1696
|
+
@cp.toplevel.symbol[n] || declare_newtopvar(n, fcall('rb_intern', sym.to_s), C::BaseType.new(:int, :unsigned))
|
|
1697
|
+
end
|
|
1698
|
+
|
|
1699
|
+
# rb_const 'FOO', Bar::Baz ==>
|
|
1700
|
+
# const_Bar = rb_const_get(rb_cObject, rb_intern("Bar"));
|
|
1701
|
+
# const_Bar_Baz = rb_const_get(const_Bar, rb_intern("Baz"));
|
|
1702
|
+
# const_Bar_Baz_FOO = rb_const_get(const_Bar_Baz, rb_intern("FOO"));
|
|
1703
|
+
# use rb_const(nil, class) to get a pointer to a class/module
|
|
1704
|
+
def rb_const(constname, owner = resolve_const_owner(constname))
|
|
1705
|
+
raise Fail, "no dynamic constant resolution #{constname}" if not owner
|
|
1706
|
+
|
|
1707
|
+
@const_value ||= { [::Object, 'Object'] => rb_global('rb_cObject') }
|
|
1708
|
+
|
|
1709
|
+
k = ::Object
|
|
1710
|
+
v = nil
|
|
1711
|
+
cname = owner.name
|
|
1712
|
+
cname += '::' + constname if constname
|
|
1713
|
+
cname.split('::').each { |n|
|
|
1714
|
+
kk = k.const_get(n)
|
|
1715
|
+
if not v = @const_value[[k, n]]
|
|
1716
|
+
# class A ; end ; B = A => B.name => 'A'
|
|
1717
|
+
vn = "const_#{escape_varname((k.name + '::' + n).sub(/^Object::/, '').gsub('::', '_'))}"
|
|
1718
|
+
vi = fcall('rb_const_get', rb_const(nil, k), fcall('rb_intern', n))
|
|
1719
|
+
v = declare_newtopvar(vn, vi)
|
|
1720
|
+
# n wont be reused, so do not alloc a global intern_#{n} for this
|
|
1721
|
+
@const_value[[k, n]] = v
|
|
1722
|
+
end
|
|
1723
|
+
k = kk
|
|
1724
|
+
}
|
|
1725
|
+
v
|
|
1726
|
+
end
|
|
1727
|
+
|
|
1728
|
+
# TODO remove this when the C compiler is fixed
|
|
1729
|
+
def rb_global(cname)
|
|
1730
|
+
C::CExpression[:*, @cp.toplevel.symbol[cname]]
|
|
1731
|
+
end
|
|
1732
|
+
|
|
1733
|
+
def get_cfuncptr(klass, method, singleton=false)
|
|
1734
|
+
# is it a func we have in the current cparser ?
|
|
1735
|
+
if ptr = @compiled_func_cache[[klass, method.to_s, singleton]]
|
|
1736
|
+
return ptr
|
|
1737
|
+
end
|
|
1738
|
+
|
|
1739
|
+
# check if it's a C or ruby func in the current interpreter
|
|
1740
|
+
cls = singleton ? (class << klass ; self ; end) : klass
|
|
1741
|
+
ptr = RubyHack.get_method_node_ptr(cls, method)
|
|
1742
|
+
return if ptr == 0
|
|
1743
|
+
ftype = RubyHack::NODETYPE[(RubyHack.memory_read_int(ptr) >> 11) & 0xff]
|
|
1744
|
+
return if ftype != :cfunc
|
|
1745
|
+
|
|
1746
|
+
# ok, so assume it will be the same next time
|
|
1747
|
+
n = escape_varname "fptr_#{klass.name}#{singleton ? '.' : '#'}#{method}".gsub('::', '_')
|
|
1748
|
+
if not v = @cp.toplevel.symbol[n]
|
|
1749
|
+
v = get_cfuncptr_dyn(klass, method, singleton, n)
|
|
1750
|
+
end
|
|
1751
|
+
|
|
1752
|
+
v
|
|
1753
|
+
end
|
|
1754
|
+
|
|
1755
|
+
def get_cfuncptr_dyn(klass, method, singleton, n)
|
|
1756
|
+
arity = singleton ? klass.method(method).arity : klass.instance_method(method).arity
|
|
1757
|
+
fproto = C::Function.new(value, [])
|
|
1758
|
+
case arity
|
|
1759
|
+
when -1; fproto.args << C::Variable.new(nil, C::BaseType.new(:int)) << C::Variable.new(nil, C::Pointer.new(value)) << C::Variable.new(nil, value)
|
|
1760
|
+
when -2; fproto.args << C::Variable.new(nil, value) << C::Variable.new(nil, value)
|
|
1761
|
+
else (arity+1).times { fproto.args << C::Variable.new(nil, value) }
|
|
1762
|
+
end
|
|
1763
|
+
|
|
1764
|
+
if not ptr = init.symbol['ptr']
|
|
1765
|
+
ptr = C::Variable.new('ptr', C::Pointer.new(C::BaseType.new(:int)))
|
|
1766
|
+
init.symbol[ptr.name] = ptr
|
|
1767
|
+
init.statements << C::Declaration.new(ptr)
|
|
1768
|
+
end
|
|
1769
|
+
|
|
1770
|
+
cls = rb_const(nil, klass)
|
|
1771
|
+
cls = fcall('rb_singleton_class', cls) if singleton
|
|
1772
|
+
init.statements << C::CExpression[ptr, :'=', fcall('rb_method_node', cls, rb_intern(method))]
|
|
1773
|
+
|
|
1774
|
+
# dynamically recheck that klass#method is a :cfunc
|
|
1775
|
+
cnd = C::CExpression[[:'!', ptr], :'||', [[[[ptr, :'[]', [0]], :>>, [11]], :&, [0xff]], :'!=', [RubyHack::NODETYPE.index(:cfunc)]]]
|
|
1776
|
+
init.statements << C::If.new(cnd, rb_raise("CFunc expected at #{klass}#{singleton ? '.' : '#'}#{method}"), nil)
|
|
1777
|
+
|
|
1778
|
+
vi = C::CExpression[[ptr, :'[]', [1]], C::Pointer.new(fproto)]
|
|
1779
|
+
declare_newtopvar(n, vi, C::Pointer.new(fproto))
|
|
1780
|
+
end
|
|
1781
|
+
|
|
1782
|
+
if defined? $trace_rbfuncall and $trace_rbfuncall
|
|
1783
|
+
# dynamic trace of all rb_funcall made from our module
|
|
1784
|
+
def rb_funcall(recv, meth, *args)
|
|
1785
|
+
if not defined? @rb_fcid
|
|
1786
|
+
@cp.parse <<EOS
|
|
1787
|
+
int atexit(void(*)(void));
|
|
1788
|
+
int printf(char*, ...);
|
|
1789
|
+
|
|
1790
|
+
static unsigned rb_fcid_max = 1;
|
|
1791
|
+
static unsigned rb_fcntr[1];
|
|
1792
|
+
|
|
1793
|
+
static void rb_fcstat(void)
|
|
1794
|
+
{
|
|
1795
|
+
unsigned i;
|
|
1796
|
+
for (i=0 ; i<rb_fcid_max ; ++i)
|
|
1797
|
+
if (rb_fcntr[i])
|
|
1798
|
+
printf("%u %u\\n", i, rb_fcntr[i]);
|
|
1799
|
+
}
|
|
1800
|
+
EOS
|
|
1801
|
+
@rb_fcid = -1
|
|
1802
|
+
@rb_fcntr = @cp.toplevel.symbol['rb_fcntr']
|
|
1803
|
+
@rb_fcid_max = @cp.toplevel.symbol['rb_fcid_max']
|
|
1804
|
+
init.statements << fcall('atexit', @cp.toplevel.symbol['rb_fcstat'])
|
|
1805
|
+
end
|
|
1806
|
+
@rb_fcid += 1
|
|
1807
|
+
@rb_fcid_max.initializer = C::CExpression[[@rb_fcid+1], @rb_fcid_max.type]
|
|
1808
|
+
@rb_fcntr.type.length = @rb_fcid+1
|
|
1809
|
+
|
|
1810
|
+
ctr = C::CExpression[:'++', [@rb_fcntr, :'[]', [@rb_fcid]]]
|
|
1811
|
+
C::CExpression[ctr, :',', super(recv, meth, *args)]
|
|
1812
|
+
end
|
|
1813
|
+
end
|
|
1814
|
+
end
|
|
1815
|
+
end
|
|
1816
|
+
|
|
1817
|
+
|
|
1818
|
+
|
|
1819
|
+
|
|
1820
|
+
if __FILE__ == $0 or ARGV.delete('ignore_argv0')
|
|
1821
|
+
|
|
1822
|
+
demo = case ARGV.first
|
|
1823
|
+
when nil; :test_jit
|
|
1824
|
+
when 'asm'; :inlineasm
|
|
1825
|
+
when 'generate'; :generate_persistent
|
|
1826
|
+
else :compile_ruby
|
|
1827
|
+
end
|
|
1828
|
+
|
|
1829
|
+
|
|
1830
|
+
case demo
|
|
1831
|
+
when :inlineasm
|
|
1832
|
+
# cnt.times { sys_write str }
|
|
1833
|
+
src_asm = <<EOS
|
|
1834
|
+
mov ecx, [ebp+8]
|
|
1835
|
+
again:
|
|
1836
|
+
push ecx
|
|
1837
|
+
|
|
1838
|
+
mov eax, 4
|
|
1839
|
+
mov ebx, 1
|
|
1840
|
+
mov ecx, [ebp+12]
|
|
1841
|
+
mov edx, [ebp+16]
|
|
1842
|
+
int 80h
|
|
1843
|
+
|
|
1844
|
+
pop ecx
|
|
1845
|
+
loop again
|
|
1846
|
+
EOS
|
|
1847
|
+
|
|
1848
|
+
src = <<EOS
|
|
1849
|
+
#{Metasm::RubyLiveCompiler::RUBY_H}
|
|
1850
|
+
|
|
1851
|
+
void doit(int, char*, int);
|
|
1852
|
+
VALUE foo(VALUE self, VALUE count, VALUE str) {
|
|
1853
|
+
doit(VAL2INT(count), STR_PTR(str), STR_LEN(str));
|
|
1854
|
+
return count;
|
|
1855
|
+
}
|
|
1856
|
+
|
|
1857
|
+
void doit(int count, char *str, int strlen) { asm(#{src_asm.inspect}); }
|
|
1858
|
+
EOS
|
|
1859
|
+
|
|
1860
|
+
class Foo
|
|
1861
|
+
end
|
|
1862
|
+
|
|
1863
|
+
m = Metasm::RubyHack.compile_c(src).encoded
|
|
1864
|
+
|
|
1865
|
+
Metasm::RubyHack.set_method_binary(Foo, 'bar', m, 2)
|
|
1866
|
+
|
|
1867
|
+
Foo.new.bar(4, "blabla\n")
|
|
1868
|
+
Foo.new.bar(2, "foo\n")
|
|
1869
|
+
|
|
1870
|
+
|
|
1871
|
+
when :compile_ruby
|
|
1872
|
+
abort 'need <class#method>' if ARGV.empty?
|
|
1873
|
+
require 'pp'
|
|
1874
|
+
puts '#if 0'
|
|
1875
|
+
ARGV.each { |av|
|
|
1876
|
+
next if not av =~ /^(.*)([.#])(.*)$/
|
|
1877
|
+
cls, sg, meth = $1, $2, $3.to_sym
|
|
1878
|
+
sg = { '.' => true, '#' => false }[sg]
|
|
1879
|
+
cls = cls.split('::').inject(::Object) { |o, cst| o.const_get(cst) }
|
|
1880
|
+
if sg
|
|
1881
|
+
ast = Metasm::RubyHack.read_singleton_method_ast(cls, meth)
|
|
1882
|
+
cls.method(meth) if not ast # raise NoMethodError
|
|
1883
|
+
puts ' --- ast ---'
|
|
1884
|
+
pp ast
|
|
1885
|
+
Metasm::RubyStaticCompiler.compile_singleton(cls, meth)
|
|
1886
|
+
else
|
|
1887
|
+
ast = Metasm::RubyHack.read_method_ast(cls, meth)
|
|
1888
|
+
cls.instance_method(meth) if not ast
|
|
1889
|
+
puts ' --- ast ---'
|
|
1890
|
+
pp ast
|
|
1891
|
+
Metasm::RubyStaticCompiler.compile(cls, meth)
|
|
1892
|
+
end
|
|
1893
|
+
}
|
|
1894
|
+
puts '', ' --- C ---', '#endif'
|
|
1895
|
+
puts Metasm::RubyStaticCompiler.dump
|
|
1896
|
+
|
|
1897
|
+
|
|
1898
|
+
when :test_jit
|
|
1899
|
+
class Foo
|
|
1900
|
+
def bla(x=500)
|
|
1901
|
+
i = 0
|
|
1902
|
+
x.times { i += 16 }
|
|
1903
|
+
i
|
|
1904
|
+
end
|
|
1905
|
+
end
|
|
1906
|
+
|
|
1907
|
+
t0 = Time.now
|
|
1908
|
+
Metasm::RubyLiveCompiler.compile(Foo, :bla)
|
|
1909
|
+
t1 = Time.now
|
|
1910
|
+
ret = Foo.new.bla(0x401_0000)
|
|
1911
|
+
puts ret.to_s(16), ret.class
|
|
1912
|
+
t2 = Time.now
|
|
1913
|
+
|
|
1914
|
+
puts "compile %.3fs run %.3fs" % [t1-t0, t2-t1]
|
|
1915
|
+
|
|
1916
|
+
when :generate_persistent
|
|
1917
|
+
Metasm::RubyStaticCompiler.compile(Metasm::Preprocessor, :getchar, :ungetchar, :unreadtok, :readtok_nopp_str, :readtok_nopp, :readtok)
|
|
1918
|
+
Metasm::RubyStaticCompiler.compile(Metasm::Expression, :reduce_rec, :initialize)
|
|
1919
|
+
Metasm::RubyStaticCompiler.compile_singleton(Metasm::Expression, :[])
|
|
1920
|
+
c_src = Metasm::RubyStaticCompiler.dump
|
|
1921
|
+
File.open('compiledruby.c', 'w') { |fd| fd.puts c_src } if $VERBOSE
|
|
1922
|
+
puts 'compiling..'
|
|
1923
|
+
begin ; require 'compiledruby' ; rescue LoadError ; end
|
|
1924
|
+
# To encode to a different file, you must also rename the Init_compliedruby() function to match the lib name
|
|
1925
|
+
Metasm::ELF.compile_c(Metasm::Ia32.new, c_src).encode_file('compiledruby.so')
|
|
1926
|
+
puts 'ruby -r metasm -r compiledruby ftw'
|
|
1927
|
+
end
|
|
1928
|
+
|
|
1929
|
+
end
|