cast_off 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/README +578 -0
  2. data/README.en +256 -0
  3. data/bin/CastOff +145 -0
  4. data/cast_off.gemspec +25 -0
  5. data/ext/cast_off/cast_off.c.rb +1386 -0
  6. data/ext/cast_off/cast_off.h +24 -0
  7. data/ext/cast_off/depend +70 -0
  8. data/ext/cast_off/extconf.rb +19 -0
  9. data/ext/cast_off/generated_c_include/inline_api.h +507 -0
  10. data/ext/cast_off/generated_c_include/iter_api.h +595 -0
  11. data/ext/cast_off/generated_c_include/unbox_api.h.rb +76 -0
  12. data/ext/cast_off/generated_c_include/vm_api.h +751 -0
  13. data/ext/cast_off/ruby_source/atomic.h +56 -0
  14. data/ext/cast_off/ruby_source/constant.h +34 -0
  15. data/ext/cast_off/ruby_source/debug.h +41 -0
  16. data/ext/cast_off/ruby_source/eval_intern.h +234 -0
  17. data/ext/cast_off/ruby_source/gc.h +98 -0
  18. data/ext/cast_off/ruby_source/id.h +175 -0
  19. data/ext/cast_off/ruby_source/insns.inc +179 -0
  20. data/ext/cast_off/ruby_source/insns_info.inc +695 -0
  21. data/ext/cast_off/ruby_source/internal.h +227 -0
  22. data/ext/cast_off/ruby_source/iseq.h +125 -0
  23. data/ext/cast_off/ruby_source/manual_update.h +135 -0
  24. data/ext/cast_off/ruby_source/method.h +105 -0
  25. data/ext/cast_off/ruby_source/node.h +503 -0
  26. data/ext/cast_off/ruby_source/thread_pthread.h +51 -0
  27. data/ext/cast_off/ruby_source/thread_win32.h +40 -0
  28. data/ext/cast_off/ruby_source/vm_core.h +756 -0
  29. data/ext/cast_off/ruby_source/vm_exec.h +184 -0
  30. data/ext/cast_off/ruby_source/vm_insnhelper.c +1748 -0
  31. data/ext/cast_off/ruby_source/vm_insnhelper.h +220 -0
  32. data/ext/cast_off/ruby_source/vm_opts.h +51 -0
  33. data/lib/cast_off.rb +15 -0
  34. data/lib/cast_off/compile.rb +629 -0
  35. data/lib/cast_off/compile/basicblock.rb +144 -0
  36. data/lib/cast_off/compile/cfg.rb +391 -0
  37. data/lib/cast_off/compile/code_manager.rb +284 -0
  38. data/lib/cast_off/compile/configuration.rb +2368 -0
  39. data/lib/cast_off/compile/dependency.rb +240 -0
  40. data/lib/cast_off/compile/information.rb +775 -0
  41. data/lib/cast_off/compile/instruction.rb +446 -0
  42. data/lib/cast_off/compile/ir/call_ir.rb +2348 -0
  43. data/lib/cast_off/compile/ir/guard_ir.rb +423 -0
  44. data/lib/cast_off/compile/ir/jump_ir.rb +223 -0
  45. data/lib/cast_off/compile/ir/operand.rb +934 -0
  46. data/lib/cast_off/compile/ir/param_ir.rb +98 -0
  47. data/lib/cast_off/compile/ir/return_ir.rb +92 -0
  48. data/lib/cast_off/compile/ir/simple_ir.rb +808 -0
  49. data/lib/cast_off/compile/ir/sub_ir.rb +212 -0
  50. data/lib/cast_off/compile/iseq.rb +454 -0
  51. data/lib/cast_off/compile/method_information.rb +1384 -0
  52. data/lib/cast_off/compile/namespace/namespace.rb +556 -0
  53. data/lib/cast_off/compile/namespace/uuid.rb +323 -0
  54. data/lib/cast_off/compile/stack.rb +65 -0
  55. data/lib/cast_off/compile/translator.rb +1562 -0
  56. data/lib/cast_off/suggestion.rb +98 -0
  57. data/lib/cast_off/util.rb +58 -0
  58. metadata +107 -0
@@ -0,0 +1,556 @@
1
+ # coding=utf-8
2
+
3
+ # Ruby to C (and then, to machine executable) compiler, originally written by
4
+ # Urabe Shyouhei <shyouhei@ruby-lang.org> during 2010. See the COPYING for
5
+ # legal info.
6
+
7
+ # This file was splitted from compiler.rb.
8
+ #require 'uuid'
9
+ require 'erb'
10
+ require 'tsort'
11
+
12
+ # ``To compile a thing is to manage its namespace.'' -- a nameless developer
13
+ class Namespace
14
+
15
+ # This is used to limit the UUID namespace
16
+ UUIDNS = UUID.parse 'urn:uuid:71614e1a-0cb4-11df-bc41-5769366ff630'
17
+
18
+ # creates a new namespace.
19
+ def self.new namemax = 31, prefix = 'yarv_'
20
+ limit = namemax - prefix.length
21
+ if limit <= 0
22
+ raise ArgumentError, "offered namespace too narrow"
23
+ elsif 128.0 / limit > 36
24
+ # Integer#to_s takes radix of range up to 36. This limit is due to
25
+ # UUIDs to be safely represented in the namespace.
26
+ raise ArgumentError, "offered namespace too narrow: at least 128bits are needed."
27
+ else
28
+ Class.new self do
29
+ @namemax = namemax
30
+ @prefix = prefix
31
+ @limit = limit
32
+ @phasechange = UUIDNS.to_s.length <= limit
33
+ bpc = 128.0 / limit
34
+ radixf = 2 ** bpc
35
+ @radix = radixf.ceil
36
+ @desired2names = Hash.new
37
+ @barenames = Hash.new
38
+ @topology = Hash.new
39
+ class << self
40
+ alias new namegen
41
+ private
42
+ m = Class.instance_method :new
43
+ define_method :old_new, m
44
+ end
45
+ self
46
+ end
47
+ end
48
+ end
49
+
50
+ class << self
51
+ include TSort
52
+
53
+ # This is aliased to class method ``new''. Generates a name unique in
54
+ # this namespace, taking as much as possible from what's _desired_.
55
+ #
56
+ # Note however, that an identical argument _desired_ generates a same
57
+ # name on multiple invocations unless _realuniq_ is true.
58
+ def namegen desired = UUID.new.to_s, realuniq = false
59
+ a = @desired2names.fetch desired, Array.new
60
+ return a.first if not realuniq and not a.empty?
61
+ n = nil
62
+ cand0 = as_tr_cpp desired.to_s
63
+ cand1 = cand0
64
+ while @barenames.has_key? cand1
65
+ n ||= 1
66
+ n += 1
67
+ cand1 = cand0 + n.to_s
68
+ end
69
+ if cand1.length <= @limit
70
+ # OK, take this
71
+ name = old_new @prefix + cand1
72
+ a.push name
73
+ @desired2names.store desired, a
74
+ @barenames.store cand1, name
75
+ return name
76
+ elsif @phasechange
77
+ # name too long, use UUID#to_s
78
+ u = UUIDNS.new_sha1 desired.to_s
79
+ return new u.to_s, realuniq
80
+ else
81
+ # yet too long, now use Integer#to_s
82
+ u = UUIDNS.new_sha1 desired.to_s
83
+ v = u.to_i.to_s @radix
84
+ return new v, realuniq
85
+ end
86
+ end
87
+
88
+ private
89
+
90
+ # Makes an identifier string corresponding to _name_, which is safe for a
91
+ # C compiler. The name as_tr_cpp was taken from a autoconf macro,
92
+ # AS_TR_CPP().
93
+ def as_tr_cpp name
94
+ q = name.dup
95
+ q.force_encoding 'ASCII-8BIT'
96
+ q.gsub! %r/[^a-zA-Z0-9_]/m, '_'
97
+ q.gsub! %r/_+/, '_'
98
+ q
99
+ end
100
+
101
+ # Details TBW
102
+ def split_decls
103
+ # Declarations are not depended each other so they can be sorted.
104
+ a = @barenames.values
105
+ a.reject! do |n|
106
+ n.declaration.nil?
107
+ end
108
+ a.map! do |n|
109
+ case n.definition
110
+ when %r/^[a-zA-Z0-9_]+\(/
111
+ sprintf "%s %s;", n.declaration, n.name
112
+ when NilClass
113
+ sprintf "%s %s;", n.declaration, n.name
114
+ else
115
+ n.definition
116
+ end
117
+ end
118
+ a.sort!
119
+ a.partition do |e|
120
+ %r/\Astatic\b/.match e
121
+ end
122
+ end
123
+
124
+ public
125
+
126
+ # Iterates over static declarations
127
+ def each_static_decls
128
+ split_decls.first.each do |e|
129
+ yield e
130
+ end
131
+ end
132
+
133
+ # Iterates over non-static declarations
134
+ def each_nonstatic_decls
135
+ split_decls.last.each do |e|
136
+ yield e
137
+ end
138
+ end
139
+
140
+ # Iterates over functions
141
+ def each_funcs
142
+ # Functions are also not depended each other.
143
+ a = @barenames.values
144
+ a.map! do |n| n.definition end
145
+ a.reject! do |e|
146
+ not %r/^[a-zA-Z0-9_]+\(/.match e
147
+ end
148
+ a.each do |e|
149
+ yield e
150
+ end
151
+ end
152
+
153
+ # Iterates over initializers
154
+ def each_initializers
155
+ # Initializers do depend each other. Order matters here.
156
+ tsort_each do |n|
157
+ if i = n.initialization
158
+ yield i
159
+ end
160
+ end
161
+ end
162
+
163
+ # Atomic each
164
+ def each
165
+ @barenames.each_value do |n|
166
+ yield n
167
+ end
168
+ end
169
+
170
+ # tsort's key enumerator
171
+ def tsort_each_node
172
+ each do |i|
173
+ yield i
174
+ end
175
+ end
176
+
177
+ # tsort's travarsal enumerator
178
+ def tsort_each_child e
179
+ e.each do |i|
180
+ yield i
181
+ end
182
+ end
183
+ end
184
+
185
+ # I originally implemented this as a simple Struct.new, but I needed some
186
+ # validations over setter methods, so I now have my own impl.
187
+ #
188
+ # A C object has at most four attributes. Normally 3 and 4 are exclusive,
189
+ # but not required to be.
190
+ # 1. A name to refer to that object
191
+ # 2. Type of that object
192
+ # 3. Static definition of that object
193
+ # 4. Dynamic initializer for that object
194
+ def initialize name = nil
195
+ @name = name
196
+ @declaration = nil
197
+ @definition = nil
198
+ @initialization = nil
199
+ @expression = nil
200
+ @dependencies = Array.new
201
+ end
202
+
203
+ attr_reader :name, :declaration, :definition, :initialization, :dependencies,
204
+ :expression
205
+
206
+ [:declaration, :definition, :initialization, :expression].each do |i|
207
+ define_method "#{i}=" do |decl|
208
+ n = "@#{i}".intern
209
+ v = instance_variable_get n
210
+ if v and v != decl
211
+ raise \
212
+ "Multiple, though not identical, object #{i} for "\
213
+ "#{self}:\n\t#{v}\n\t#{decl}"
214
+ elsif v
215
+ # do nothing
216
+ else
217
+ instance_variable_set n, decl
218
+ end
219
+ end
220
+ end
221
+
222
+ # dangerous. do not use this.
223
+ def force_set_decl! decl
224
+ @declaration = decl
225
+ @definition = nil
226
+ end
227
+
228
+ def to_s
229
+ @expression or @name
230
+ end
231
+
232
+ def each
233
+ @dependencies.each do |i|
234
+ yield i
235
+ end
236
+ end
237
+
238
+ def depends obj
239
+ @dependencies.push obj
240
+ obj
241
+ end
242
+ end
243
+
244
+ # This is a hack, not to hold entire output on-memory. A YARV-converted C
245
+ # sourcecode can be huge, in order of megabytes. It is not a wise idea for you
246
+ # to allocate such a large string at once.
247
+ #
248
+ # Taken from: http://d.hatena.ne.jp/m_seki/20100228#1267314143
249
+ class ERB
250
+ def set_eoutvar c, e
251
+ c.pre_cmd = ["#{e} ||= ''"]
252
+ c.post_cmd = []
253
+ c.put_cmd = c.insert_cmd = "#{e} << "
254
+ end
255
+
256
+ def trigger b
257
+ eval @src, b, '(erb)', 0
258
+ end
259
+ end
260
+
261
+ module Converter
262
+ Quote = Struct.new :unquote # :nodoc:
263
+
264
+ # Some kinds of literals are there:
265
+ #
266
+ # - Fixnums, as well as true, false, and nil: they are 100% statically
267
+ # computable while the compilation. No cache needed.
268
+ # - Bignums, Floats, Ranges and Symbols: they are almost static, except for
269
+ # the first time. Suited for caching.
270
+ # - Classes: not computable by the compiler, but once a ruby process boots
271
+ # up, they already are.
272
+ # - Strings: every time a literal is evaluated, a new string object is
273
+ # created. So a cache won't work.
274
+ # - Regexps: almost the same as Strings, except for /.../o, which can be
275
+ # cached.
276
+ # - Arrays and Hashes: they also generate new objects every time, but their
277
+ # contents can happen to be cached.
278
+ #
279
+ # Cached objects can be ``shared'' -- for instance multiple occasions of an
280
+ # identical bignum can and should point to a single address of memory.
281
+ def robject2csource obj, namespace, strmax, volatilep = false, name = nil, contentsp = false
282
+ decl = 'VALUE'
283
+ vdef = 'Qundef'
284
+ init = nil
285
+ deps = Array.new
286
+ expr = nil
287
+ case obj
288
+ when Quote # hack
289
+ name ||= obj.unquote.to_s
290
+ when Fixnum
291
+ name ||= 'LONG2FIX(%d)' % obj
292
+ when TrueClass, FalseClass, NilClass
293
+ name ||= 'Q%p' % obj
294
+ when Bignum
295
+ # Bignums can be large enough to exceed C's string max. From this
296
+ # method's usage a bignum reaching this stage is sourced from a Ruby
297
+ # source code's bignum literals, so they might not be much larger
298
+ # though.
299
+ name ||= namespace.new 'num_' + obj.to_s
300
+ rstr = robject2csource obj.to_s, namespace, strmax, :volatile
301
+ init = sprintf "rb_str2inum(%s, 10)", rstr
302
+ deps << rstr
303
+ when Float
304
+ name ||= namespace.new 'float_' + obj.to_s
305
+ init = sprintf 'rb_float_new(%s)', obj
306
+ when Range
307
+ from = robject2csource obj.begin, namespace, strmax, :volatile
308
+ to = robject2csource obj.end, namespace, strmax, :volatile
309
+ xclp = obj.exclude_end? ? 1 : 0
310
+ init = sprintf "rb_range_new(%s, %s, %d)", from, to, xclp
311
+ name ||= namespace.new
312
+ deps << from << to
313
+ when Class
314
+ # From my investigation over the MRI implementation, those three
315
+ # classes are the only classes that can appear in an instruction
316
+ # sequence. Don't know why though.
317
+ init = if obj == Object then 'rb_cObject'
318
+ elsif obj == Array then 'rb_cArray'
319
+ elsif obj == StandardError then 'rb_eStandardError'
320
+ else
321
+ raise TypeError, "unknown literal object #{obj}"
322
+ end
323
+ when String
324
+ #if obj.empty?
325
+ ## Empty strings are lightweight enough, do not need encodings.
326
+ #name ||= 'rb_str_new(0, 0)'
327
+ #else
328
+ # Like I write here and there Ruby strings can be much longer than
329
+ # C strings can be. Plus a Ruby string has its encoding. So when
330
+ # we reconstruct a Ruby string, we need a set of C strings plus an
331
+ # encoding object.
332
+ #if obj.ascii_only?
333
+ #name ||= $namespace.new 'str_' + obj
334
+ #aenc = Encoding.find 'US-ASCII'
335
+ #encn = robject2csource aenc, namespace, strmax, :volatile
336
+ #else
337
+ name ||= namespace.new 'str_' + obj.encoding.name + '_' + obj
338
+ encn = robject2csource obj.encoding, namespace, strmax, :volatile, nil, true
339
+ #end
340
+ deps << encn
341
+ argv = rstring2cstr obj, strmax
342
+ argv.each do |i|
343
+ if init
344
+ x = sprintf ";\nrb_enc_str_buf_cat(%s, %s, %d, %s)",
345
+ name, *i, encn
346
+ init << x
347
+ else
348
+ init = sprintf "rb_enc_str_new(%s, %d, %s)", *i, encn
349
+ end
350
+ end
351
+ if $YARVAOT_DEBUG
352
+ #init << ";\n /* #{obj} */"
353
+ end
354
+ #end
355
+ when Encoding
356
+ # Thank goodness, encoding names are short and will never contain
357
+ # multilingual chars.
358
+ rstr = obj.name
359
+ if contentsp
360
+ decl = 'rb_encoding*'
361
+ vdef = '0'
362
+ init = 'rb_enc_find("%s")' % rstr
363
+ name ||= namespace.new 'enc_' + rstr
364
+ else
365
+ encn = robject2csource obj, namespace, strmax, :volatile, nil, true
366
+ deps << encn
367
+ init = 'rb_enc_from_encoding(%s)' % encn
368
+ name ||= namespace.new 'encval_' + rstr
369
+ end
370
+ when Symbol
371
+ str = obj.id2name
372
+ if str.bytesize <= strmax
373
+ # Why a symbol is not cached as a VALUE? Well a VALUE in C static
374
+ # variable needs to be scanned during GC because VALUEs can have
375
+ # links against some other objects in general. But that's not the
376
+ # case for Symbols -- they do not have links internally. An ID
377
+ # variable needs no GC because it's clear they are not related to
378
+ # GC at all. So a Symbol is more efficient when stored as an ID,
379
+ # rather than a VALUE.
380
+ a = rstring2cstr str, strmax
381
+ e = robject2csource str.encoding, namespace, strmax, :volatile, nil, true
382
+ name = namespace.new 'sym_' + obj.to_s
383
+ decl = 'ID'
384
+ vdef = '0'
385
+ init = sprintf 'rb_intern3(%s, %d, %s);', *a[0], e
386
+ expr = 'ID2SYM(%s)' % name.name
387
+ deps << e
388
+ else
389
+ # Longer symbols are much like regexps
390
+ name ||= namespace.new 'sym_' + str
391
+ rstr = robject2csource str, namespace, strmax, :volatile
392
+ init = 'rb_str_intern(%s)' % rstr
393
+ deps << rstr
394
+ end
395
+ when Regexp
396
+ opts = obj.options
397
+ srcs = robject2csource obj.source, namespace, strmax, :volatile
398
+ name ||= namespace.new "reg#{opts}_" + srcs.to_s
399
+ init = sprintf 'rb_reg_new_str(%s, %d)', srcs, opts
400
+ deps << srcs
401
+ when Array
402
+ n = obj.length
403
+ if n == 0
404
+ # zero-length arrays need no cache, because a creation of such
405
+ # object is fast enough.
406
+ name ||= 'rb_ary_new2(0)'
407
+ #elsif n == 1
408
+ ## no speedup, but a bit readable output
409
+ #i = obj.first
410
+ #e = robject2csource i, namespace, strmax, :volatile
411
+ #j = as_tr_cpp e.to_s
412
+ #s = 'a' + j
413
+ #name ||= $namespace.new s
414
+ #init = 'rb_ary_new3(1, %s)' % e
415
+ #deps << e
416
+ elsif n <= 30
417
+ # STDC's max # of function arguments are 31, so at most 30 elems
418
+ # are made at once.
419
+ init = 'rb_ary_new3(%d' % obj.length
420
+ obj.each do |x|
421
+ y = robject2csource x, namespace, strmax, :volatile
422
+ init << ",\n " << y.to_s
423
+ deps << y
424
+ end
425
+ init << ')'
426
+ s = init.sub %r/\Arb_ary_new3\(\d+,\s+/, 'a'
427
+ name ||= namespace.new 'ary_' + s
428
+ else
429
+ # Too large to create at once. Feed litte by litte.
430
+ name ||= namespace.new
431
+ init = 'rb_ary_new()'
432
+ obj.each do |i|
433
+ j = robject2csource i, namespace, strmax, :volatile
434
+ k = sprintf 'rb_ary_push(%s, %s)', name, j
435
+ init << ";\n " << k
436
+ deps << j
437
+ end
438
+ end
439
+ when Hash
440
+ # Hashes are not computable in a single expression...
441
+ name ||= namespace.new
442
+ init = "rb_hash_new()"
443
+ obj.each_pair do |k, v|
444
+ knam = robject2csource k, namespace, strmax, :volatile
445
+ vnam = robject2csource v, namespace, strmax, :volatile
446
+ aset = sprintf 'rb_hash_aset(%s, %s, %s)', name, knam, vnam
447
+ init << ";\n " << aset
448
+ deps << knam << vnam
449
+ end
450
+ else
451
+ raise TypeError, "unknown literal object #{obj.inspect}"
452
+ end
453
+
454
+ name ||= namespace.new init
455
+ case name when namespace
456
+ static_decl = "static #{decl}"
457
+ if volatilep and name.declaration == static_decl
458
+ # OK? same object, different visibility
459
+ elsif not volatilep and name.declaration == decl
460
+ # OK? same object, different visibility
461
+ name.force_set_decl! static_decl
462
+ else
463
+ name.declaration = volatilep ? decl : static_decl
464
+ end
465
+ name.definition = "#{name.declaration} #{name.name} = #{vdef};"
466
+ name.initialization = "#{name.name} = #{init};" if init
467
+ name.expression = expr
468
+ deps.each do |i|
469
+ case i when namespace
470
+ name.dependencies.push i
471
+ end
472
+ end
473
+ end
474
+ return name
475
+ end
476
+
477
+ # Yet more long string than gen_lenptr
478
+ def gen_each_lenptr var, str, strmax
479
+ names = Array.new
480
+ str.each_line.with_index do |i, j|
481
+ a = rstring2cstr i, strmax
482
+ case a.size when 1
483
+ vnam = sprintf '%s_%x', var, j
484
+ gnam = gen_lenptr vnam, *a[0]
485
+ names << gnam
486
+ else
487
+ a.each_with_index do |b, k|
488
+ vnam = sprintf '%s_%x_%x', var, j, k
489
+ gnam = gen_lenptr vnam, *b
490
+ names << gnam
491
+ end
492
+ end
493
+ end
494
+ names.each_cons 2 do |x, y|
495
+ y.depends x
496
+ end
497
+ end
498
+
499
+ # Static allocation of a loooooong string
500
+ def gen_lenptr var, ptr, len
501
+ name = $namespace.new var
502
+ name.declaration = "static sourcecode_t #{name}"
503
+ name.definition = sprintf '%s = { %#05x, %s, };',
504
+ name.declaration, len, ptr
505
+ name
506
+ end
507
+
508
+ # Returns a 2-dimensional array [[str, len], [str, len], ... ]
509
+ #
510
+ # This is needed because Ruby's String#dump is different from C's.
511
+ def rstring2cstr str, strmax, rs = nil
512
+ return [["".inspect, 0]] if str.empty?
513
+ a = str.each_line rs
514
+ a = a.to_a
515
+ a.map! do |b|
516
+ c = b.each_byte.each_slice strmax
517
+ c.to_a
518
+ end
519
+ a.flatten! 1
520
+ a.map! do |bytes|
521
+ b = bytes.each_slice 80
522
+ c = b.map do |d|
523
+ d.map do |e|
524
+ '\\x%x' % e
525
+ #case e # this case statement is optimized
526
+ #when 0x00 then '\\0'
527
+ #when 0x07 then '\\a'
528
+ #when 0x08 then '\\b'
529
+ #when 0x09 then '\\t'
530
+ #when 0x0A then '\\n'
531
+ #when 0x0B then '\\v'
532
+ #when 0x0C then '\\f'
533
+ #when 0x0D then '\\r'
534
+ #when 0x22 then '\\"'
535
+ #when 0x27 then '\\\''
536
+ #when 0x5C then '\\\\' # not \\
537
+ #else
538
+ #case e
539
+ #when 0x20 ... 0x7F then '%c' % e
540
+ #else '\\x%x' % e
541
+ #end
542
+ #end
543
+ end
544
+ end
545
+ c.map! do |d|
546
+ "\n " '"' + d.join + '"'
547
+ end
548
+ if c.size == 1
549
+ c.first.strip!
550
+ end
551
+ [ c.join, bytes.size, ]
552
+ end
553
+ a
554
+ end
555
+ end
556
+